Beispiel #1
0
def do(TheAnalyzer, CompressionType, AvailableStateIndexSet):
    """Starting point of the search for single character traces in the 
    state machine (TheAnalyzer). For each state in the state machine
    try to find branches of paths. 
    
    States which are closer to the init state are searched first. This 
    way quickly a set can be build of longest paths, which make searches
    from follower states unnecessary.
    """
    # depth_db: state_index ---> distance from init state.
    # We first search for the longest paths, so that searches for sub paths
    # become unnecessary. This way computation time is reduced.
    depth_db = TheAnalyzer.get_depth_db()

    iterable_state_indices = (                                                \
        i for i in TheAnalyzer.state_db.iterkeys()                            \
        if i in AvailableStateIndexSet and i != TheAnalyzer.init_state_index \
    )

    path_list = flatten_list_of_lists(
        CharacterPathList_find(TheAnalyzer, state_index, CompressionType,
                               AvailableStateIndexSet)
        for state_index in sorted(iterable_state_indices,
                                  key=lambda i: depth_db[i]))

    return path_list
Beispiel #2
0
def print_snapshot_map_scheme(info, Prefix=""):
    print Prefix + "Snapshot Map Schemes:"

    all_set = set(
        flatten_list_of_lists(
            entry_recipe.snapshot_map.keys()
            for entry_recipe in info.entry_recipe_db.values()))
    if not all_set:
        print
        return

    L = max(len("%s" % repr(x)) for x in all_set)
    predecessor_list = sorted(info.entry_recipe_db.iterkeys())
    print Prefix + "   %s      %s" % (" " * L, "".join(
        "%-10s" % si for si in predecessor_list))

    for variable_id in sorted(list(all_set)):
        scheme = [
            info.entry_recipe_db[si].snapshot_map.get(variable_id)
            for si in predecessor_list
        ]
        name = "%s" % repr(variable_id)
        space = " " * (L - len(name))
        print Prefix + "   %s:%s %s" % (name, space, "".join(
            "%8s, " % x if x is not None else "          " for x in scheme))
    print
Beispiel #3
0
def do_from_leaf_to_root(TheState,
                         OpTree,
                         LeafDoorId,
                         done_set,
                         GlobalEntryF=False):
    """Code the sequence from a leaf of the command tree to its root. This
    avoids unnecessary gotos from outer nodes to their parents. It stops,
    whenever a parent is already implemented.  Then, the function 'code()'
    automatically inserts a 'goto parent' at the end of the node.

    RETURNS: list of strings 
    
    The list of string implements nodes from a command tree leaf over all of
    its parents to the root, or the first already implemented parent.
    """
    txt = []
    if not GlobalEntryF:
        # When the entry is a global entry into the analyzer, then it is slipped
        # into at function begin. => no 'assert unreachable'! Else, yes!
        txt.append("\n\n    %s\n" % Lng.UNREACHABLE)

    txt.extend(
        flatten_list_of_lists(
            __code(node, TheState, done_set, GlobalEntryF)
            for node in OpTree.iterable_to_root(LeafDoorId, done_set)))
    return txt
Beispiel #4
0
def print_ip_offset_scheme(info, Prefix=""):
    print Prefix + "Input Pointer Offset Schemes:"

    all_set = set(
        flatten_list_of_lists(
            entry_recipe.ip_offset_db.keys()
            for entry_recipe in info.entry_recipe_db.values()))
    if not all_set:
        print
        return

    L = max(len("%s" % x) for x in all_set)
    predecessor_list = sorted(info.entry_recipe_db.iterkeys())
    print Prefix + "   %s      %s" % (" " * L, "".join(
        "%-10s" % si for si in predecessor_list))

    for position_register in sorted(list(all_set)):
        scheme = [
            info.entry_recipe_db[si].ip_offset_db.get(position_register)
            for si in predecessor_list
        ]
        name = "%s" % position_register
        space = " " * (L - len(name))
        print Prefix + "   %s:%s %s" % (name, space, "".join(
            "%8s, " % x if x is not None else "<irrelv>, " for x in scheme))
    print
Beispiel #5
0
    def _branch_table_core(self, Selector, CaseList, get_case, DefaultConsequence=None):
        def get_content(C):
            if type(C) == list: return "".join(C)
            else:               return C

        def iterable(CaseList, DefaultConsequence):
            item, effect = CaseList[0]
            for item_ahead, effect_ahead in CaseList[1:]:
                if effect_ahead == effect: 
                    yield item, ""
                else:
                    yield item, effect
                item   = item_ahead
                effect = effect_ahead
            yield item, effect
            if DefaultConsequence is not None:
                yield None, DefaultConsequence

        txt = [ "switch( %s ) {\n" % Selector ]
        txt.extend(
            flatten_list_of_lists(
                get_case(item, text, get_content)
                for item, text in iterable(CaseList, DefaultConsequence)
            )
        )
        txt.append("}\n")
        return txt
Beispiel #6
0
 def do_NumberSet(self, NSet):
     """RETURNS: List of interval sequences that implement the number set.
     """
     return flatten_list_of_lists(
         self.get_interval_sequences(interval)
         for interval in NSet.get_intervals(PromiseToTreatWellF=True)
     )
Beispiel #7
0
 def get_number_list(self):
     """RETURNS: -- List of all numbers which are contained in the number set. 
                 -- None, if one border is 'sys.maxint'. The list would be too big.
     """
     return flatten_list_of_lists(
         xrange(interval.begin, interval.end)
         for interval in self.__intervals)
Beispiel #8
0
def _get_source_code(analyzer_list, terminal_list, ColumnNPerChunk, 
                     AppendixSmExistF):
    """RETURNS: String containing source code for the 'loop'. 

       -- The source code for the (looping) state machine.
       -- The terminals which contain counting actions.

    Also, it requests variable definitions as they are required.
    """
    txt = flatten_list_of_lists(
        generator.do_analyzer(analyzer) for analyzer in analyzer_list
    )
    txt.extend(
        generator.do_terminals(terminal_list, TheAnalyzer=None)
    )
    loop_analyzer = analyzer_list[0]
    if loop_analyzer.engine_type.subject_to_reload():
        txt.extend(
            generator.do_reload_procedure(loop_analyzer)
        )

    if AppendixSmExistF or ColumnNPerChunk is not None:
        variable_db.require("reference_p", 
                            Condition="QUEX_OPTION_COLUMN_NUMBER_COUNTING")
    if Setup.buffer_codec.variable_character_sizes_f():
        variable_db.require("lexatom_begin_p")

    return txt
Beispiel #9
0
def do(TheAnalyzer, CompressionType, AvailableStateIndexSet):
    """Starting point of the search for single character traces in the 
    state machine (TheAnalyzer). For each state in the state machine
    try to find branches of paths. 
    
    States which are closer to the init state are searched first. This 
    way quickly a set can be build of longest paths, which make searches
    from follower states unnecessary.
    """
    # depth_db: state_index ---> distance from init state.
    # We first search for the longest paths, so that searches for sub paths
    # become unnecessary. This way computation time is reduced.
    depth_db = TheAnalyzer.get_depth_db()

    iterable_state_indices = (                                                \
        i for i in TheAnalyzer.state_db.iterkeys()                            \
        if i in AvailableStateIndexSet and i != TheAnalyzer.init_state_index \
    )

    path_list = flatten_list_of_lists(
        CharacterPathList_find(TheAnalyzer, state_index, CompressionType, AvailableStateIndexSet)
        for state_index in sorted(iterable_state_indices, key=lambda i: depth_db[i])
    )

    return path_list
Beispiel #10
0
def do_sequence(Sequence, TrafoInfo=None, fh=-1):
    if TrafoInfo is None:
        TrafoInfo = Setup.buffer_codec

    return flatten_list_of_lists(
        do_character(x, TrafoInfo, fh)
        for x in Sequence
    )
Beispiel #11
0
 def get_number_list(self):
     """RETURNS: -- List of all numbers which are contained in the number set. 
                 -- None, if one border is 'sys.maxint'. The list would be too big.
     """
     return flatten_list_of_lists(
         xrange(interval.begin, interval.end)
         for interval in self.__intervals
     )
Beispiel #12
0
def _analyzer_functions_get(ModeDB):
    mode_name_list = ModeDB.keys()  

    code = flatten_list_of_lists( 
        engine_generator.do_with_counter(mode, mode_name_list) for mode in ModeDB.itervalues() 
    )

    code.append(
        engine_generator.comment_match_behavior(ModeDB.itervalues())
    )

    # generate frame for analyser code
    return Lng.FRAME_IN_NAMESPACE_MAIN("".join(code))
Beispiel #13
0
    def value_list(self, Name):
        """The content of a value is a sequence, and the return value of this
        function is a concantinated list of all listed option setting values.
        """
        setting_list = self.__get_setting_list(Name)
        if setting_list is None: return None

        info = mode_option_info_db[Name]
        if info.content_is_list():
            result = flatten_list_of_lists(x.value for x in setting_list)
        else:
            result = [x.value for x in setting_list]

        return result
Beispiel #14
0
def __collect_files(DirList):
    if DirList is None: dir_list = dir_db.keys() 
    else:               dir_list = DirList

    if not Setup.implement_lib_quex_f:
        dir_list = [ d for d in dir_list if not d.startswith("quex/") ]
    if not Setup.implement_lib_lexeme_f:
        dir_list = [ d for d in dir_list if not d.startswith("lexeme/") ]

    result = set(flatten_list_of_lists(
        dir_db_get_files(d) for d in dir_list
    ))

    if not Setup.token_class_only_f:
        result.update(dir_db[""])
    return result
Beispiel #15
0
    def value_list(self, Name):
        """The content of a value is a sequence, and the return value of this
        function is a concantinated list of all listed option setting values.
        """
        setting_list = self.__get_setting_list(Name)
        if setting_list is None: return None

        info = mode_option_info_db[Name]
        if info.content_is_list():
            result = flatten_list_of_lists(
                x.value for x in setting_list
            )
        else:
            result = [ x.value for x in setting_list ]

        return result
Beispiel #16
0
    def get_tree_text(self, CommandAliasDb, Node=None, Depth=0):
        """__dive: indicate recursion. May be solved by 'TreeWalker'.
        """
        if Node is None:
            Node = self.root

        txt = flatten_list_of_lists(
            self.get_tree_text(CommandAliasDb, self.door_db[door_id], Depth +
                               1) for door_id in sorted(Node.child_set))

        txt.extend([
            "    " * (Depth + 1), ".--",
            str(Node.door_id),
            " [%s]\n" % ("".join("%s " % CommandAliasDb[cmd]
                                 for cmd in Node.command_list)).strip()
        ])
        return txt
Beispiel #17
0
def do_leafs(TheState, OpTree, done_set):
    """Create code starting from the 'leafs' of the command tree. The leafs are 
    the entry points from other states, i.e. the 'doors'.

    RETURNS: List of strings.
    """
    outer_door_id_set = TheState.entry.door_id_set()

    txt_list = []
    for door_id in outer_door_id_set:
        if door_id in done_set: continue
        branch_txt = do_from_leaf_to_root(TheState, OpTree, door_id, done_set)
        txt_list.append(branch_txt)

    # Flatten the list of lists, where the longest list has to come last.
    result = flatten_list_of_lists(sorted(txt_list, key=lambda x: len(x)))
    return result
Beispiel #18
0
def do_leafs(TheState, OpTree, done_set):
    """Create code starting from the 'leafs' of the command tree. The leafs are 
    the entry points from other states, i.e. the 'doors'.

    RETURNS: List of strings.
    """
    outer_door_id_set = TheState.entry.door_id_set()

    txt_list = []
    for door_id in outer_door_id_set:
        if door_id in done_set: continue
        branch_txt = do_from_leaf_to_root(TheState, OpTree, door_id, done_set)
        txt_list.append(branch_txt)

    # Flatten the list of lists, where the longest list has to come last.
    result = flatten_list_of_lists(sorted(txt_list, key=lambda x: len(x)))
    return result
Beispiel #19
0
    def get_tree_text(self, CommandAliasDb, Node=None, Depth=0):
        """__dive: indicate recursion. May be solved by 'TreeWalker'.
        """
        if Node is None: 
            Node = self.root

        txt = flatten_list_of_lists(
            self.get_tree_text(CommandAliasDb, self.door_db[door_id], Depth+1)
            for door_id in sorted(Node.child_set)
        )

        txt.extend([
            "    " * (Depth + 1), 
            ".--", 
            str(Node.door_id), 
            " [%s]\n" % ("".join("%s " % CommandAliasDb[cmd] for cmd in Node.command_list)).strip()
        ])
        return txt
Beispiel #20
0
def argv_ufo_detections(Cl):
    """Detects unidentified command line options.
    """
    known_option_list = []
    for info in SETUP_INFO.itervalues():
        if type(info) != list: continue
        known_option_list.extend(info[0])

    ufo_list = Cl.unidentified_options(known_option_list)
    if not ufo_list: return

    pre_filter_flag_info = [info for info in SETUP_INFO.itervalues() if info]
    all_flag_list = flatten_list_of_lists(
        flag_list for flag_list, dummy in pre_filter_flag_info)
    ufo = ufo_list[0]

    error.log_similar(ufo, all_flag_list,
                      "Unknown command line option '%s'" % ufo)
Beispiel #21
0
    def do_transition(self, from_target_map, FromSi, ToSi, BadLexatomSi):
        """Translates to transition 'FromSi' --> 'ToSi' inside the state
        machine according to the specific coding (see derived class, i.e.
        UTF8 or UTF16).

        'BadLexatomSi' is None => no bad lexatom detection.
                       else, transitions to 'bad lexatom state' are added
                       on invalid code units.

        RETURNS: [0] True if complete, False else.
                 [1] StateDb of newly generated states.
        """
        number_set = from_target_map[ToSi]

        # Check whether a modification is necessary
        if number_set.least_greater_bound() <= self.UnchangedRange:
            # 'UnchangedRange' => No change to numerical values.
            return True, None

        if not self.cut_forbidden_range(number_set):
            # 'number_set' solely contains forbidden elements.
            del from_target_map[ToSi]
            return False, None

        transformed_interval_sequence_list = flatten_list_of_lists(
            self.get_interval_sequences(interval)
            for interval in number_set.get_intervals(PromiseToTreatWellF=True))

        # Second, enter the new transitions.
        new_target_map, \
        new_state_db    = self.plug_interval_sequences(FromSi, ToSi,
                                                       transformed_interval_sequence_list,
                                                       BadLexatomSi)

        # Absorb new transitions into the target map of the 'from state'.
        del from_target_map[ToSi]
        from_target_map.update(new_target_map)

        return True, new_state_db
Beispiel #22
0
def _get_source_code(analyzer_list, terminal_list, ColumnNPerChunk,
                     AppendixSmExistF):
    """RETURNS: String containing source code for the 'loop'. 

       -- The source code for the (looping) state machine.
       -- The terminals which contain counting actions.

    Also, it requests variable definitions as they are required.
    """
    txt = flatten_list_of_lists(
        generator.do_analyzer(analyzer) for analyzer in analyzer_list)
    txt.extend(generator.do_terminals(terminal_list, TheAnalyzer=None))
    loop_analyzer = analyzer_list[0]
    if loop_analyzer.engine_type.subject_to_reload():
        txt.extend(generator.do_reload_procedure(loop_analyzer))

    if AppendixSmExistF or ColumnNPerChunk is not None:
        variable_db.require("reference_p",
                            Condition="QUEX_OPTION_COLUMN_NUMBER_COUNTING")
    if Setup.buffer_codec.variable_character_sizes_f():
        variable_db.require("lexatom_begin_p")

    return txt
Beispiel #23
0
def do_from_leaf_to_root(TheState, OpTree, LeafDoorId, done_set, GlobalEntryF=False):
    """Code the sequence from a leaf of the command tree to its root. This
    avoids unnecessary gotos from outer nodes to their parents. It stops,
    whenever a parent is already implemented.  Then, the function 'code()'
    automatically inserts a 'goto parent' at the end of the node.

    RETURNS: list of strings 
    
    The list of string implements nodes from a command tree leaf over all of
    its parents to the root, or the first already implemented parent.
    """
    txt = []
    if not GlobalEntryF:
        # When the entry is a global entry into the analyzer, then it is slipped
        # into at function begin. => no 'assert unreachable'! Else, yes!
        txt.append("\n\n    %s\n" % Lng.UNREACHABLE)

    txt.extend( 
        flatten_list_of_lists(
            __code(node, TheState, done_set, GlobalEntryF)
            for node in OpTree.iterable_to_root(LeafDoorId, done_set)
        )
    )
    return txt
Beispiel #24
0
def print_ip_offset_scheme(info, Prefix=""):
    print Prefix + "Input Pointer Offset Schemes:"

    all_set = set(flatten_list_of_lists(
        entry_recipe.ip_offset_db.keys()
        for entry_recipe in info.entry_recipe_db.values()
    ))
    if not all_set:
        print
        return 

    L = max(len("%s" % x) for x in all_set)
    predecessor_list = sorted(info.entry_recipe_db.iterkeys())
    print Prefix + "   %s      %s" % (" "*L, "".join("%-10s" % si for si in predecessor_list))

    for position_register in sorted(list(all_set)):
        scheme = [
            info.entry_recipe_db[si].ip_offset_db.get(position_register)
            for si in predecessor_list
        ]
        name        = "%s" % position_register
        space       = " " * (L - len(name))
        print Prefix + "   %s:%s %s" % (name, space, "".join("%8s, " % x if x is not None else "<irrelv>, " for x in scheme))
    print
Beispiel #25
0
def print_snapshot_map_scheme(info, Prefix=""):
    print Prefix + "Snapshot Map Schemes:"

    all_set = set(flatten_list_of_lists(
        entry_recipe.snapshot_map.keys()
        for entry_recipe in info.entry_recipe_db.values()
    ))
    if not all_set:
        print
        return 

    L = max(len("%s" % repr(x)) for x in all_set)
    predecessor_list = sorted(info.entry_recipe_db.iterkeys())
    print Prefix + "   %s      %s" % (" "*L, "".join("%-10s" % si for si in predecessor_list))

    for variable_id in sorted(list(all_set)):
        scheme = [
            info.entry_recipe_db[si].snapshot_map.get(variable_id)
            for si in predecessor_list
        ]
        name  = "%s" % repr(variable_id)
        space = " " * (L - len(name))
        print Prefix + "   %s:%s %s" % (name, space, "".join("%8s, " % x if x is not None else "          " for x in scheme))
    print
Beispiel #26
0
def TRY_terminal_delimiter_sequence(Mode, UnicodeSequence, UnicodeEndSequencePattern, UponReloadDoneAdr):
    UnicodeEndSequencePattern.prepare_count_info(Mode.counter_db, 
                                                 Setup.buffer_codec)

    # Trasform letter by letter.
    sequence = flatten_list_of_lists(
        transformation.do_character(x, Setup.buffer_codec)
        for x in UnicodeSequence
    )

    EndSequenceChunkN = len(sequence)

    # Column and line number count for closing delimiter
    run_time_counting_required_f, counter_txt = \
            counter_for_pattern.get(UnicodeEndSequencePattern, ShiftF=False)
    # The Closer Delimiter must be a string. As such it has a pre-determined size.
    assert not run_time_counting_required_f 

    # Column and line number count for 'normal' character.
    tm, column_counter_per_chunk = \
            counter.get_XXX_counter_map(Mode.counter_db, "me->buffer._input_p", 
                                    Trafo=Setup.buffer_codec)

    dummy, character_count_txt, dummy = \
            counter.get_core_step(tm, "me->buffer._input_p")


    txt = []
    for i, x in enumerate(sequence):
        txt.append(i)
        txt.append(Lng.IF_INPUT("==", "0x%X" % x, FirstF=True)) # Opening the 'if'
        txt.append(i+1)
        txt.append("%s\n" % Lng.INPUT_P_INCREMENT())

    Lng.INDENT(counter_txt, i+1)
    if column_counter_per_chunk:
        txt.append(i+1)
        if column_counter_per_chunk == UnicodeEndSequencePattern.count_info().column_n_increment_by_lexeme_length:
            txt += Lng.REEFERENCE_P_COLUMN_ADD("me->buffer._input_p", 
                                              column_counter_per_chunk) 
        else:
            txt += Lng.REEFERENCE_P_COLUMN_ADD("(me->buffer._input_p - %i)" % EndSequenceChunkN, 
                                              column_counter_per_chunk) 
            txt.append(i+1)
            txt.extend(counter_txt)
    txt.append(i+1)
    txt.append("break;\n")

    for i, x in r_enumerate(sequence):
        txt.append(i)
        txt.append("%s"   % Lng.IF_INPUT("==", "0x%X" % Setup.buffer_limit_code, FirstF=False)) # Check BLC
        txt.append(i+1)
        txt.append("%s\n" % Lng.LEXEME_START_SET("me->buffer._input_p - %i" % i))
        txt.append(i+1)
        txt.append("%s\n" % Lng.GOTO_RELOAD(UponReloadDoneAdr, True, engine.FORWARD))  # Reload
        if i == 0: break
        txt.append(i)
        txt.append("%s"   % Lng.ELSE)
        txt.append(i+1)
        txt.append("%s\n" % Lng.INPUT_P_ADD(- i))
        txt.append(i)
        txt.append("%s\n" % Lng.END_IF())

    txt.append(i)
    txt.append("%s\n" % Lng.END_IF())

    txt.extend(character_count_txt)

    # print "##DEBUG:\n%s" % "".join(Lng.GET_PLAIN_STRINGS(txt))
    return txt
Beispiel #27
0
 def accepting_state_index_list(self):
     return flatten_list_of_lists((x.accepting_state_index
                                   for x in acceptance_sequence)
                                  for acceptance_sequence in self.__list)
Beispiel #28
0
def do_core(Mode):
    """Produces main code for an analyzer function which can detect patterns given in
    the 'PatternList' and has things to be done mentioned in 'TerminalDb'. 

    RETURN: Code implementing the lexical analyzer.

    The code is not embedded in a function and required definitions are not provided.
    This happens through function 'wrap_up()'.
    """
    # Prepare the combined state machines and terminals 
    TerminalDb         = Mode.terminal_db
    ReloadStateForward = Mode.reload_state_forward
    OnAfterMatchCode   = Mode.incidence_db.get_CodeTerminal(E_IncidenceIDs.AFTER_MATCH)
    dial_db            = Mode.dial_db

    variable_db.require_registers(flatten_list_of_lists(
        terminal.required_register_set()
        for terminal in TerminalDb.itervalues()
    ))

    # (*) Pre Context DFA
    #     (If present: All pre-context combined in single backward analyzer.)
    Lng.debug_unit_name_set("Pre-Context:%s" % Mode.name)
    pre_context,         \
    pre_analyzer         = generator.do_pre_context(Mode.pre_context_sm_to_be_reversed_list,
                                                    Mode.pre_context_sm_id_list,
                                                    dial_db)
    # assert all_isinstance(pre_context, (IfDoorIdReferencedCode, int, str, unicode))

    # (*) Backward input position detection
    #     (Seldomly present -- only for Pseudo-Ambiguous Post Contexts)
    Lng.debug_unit_name_set("Backward-Input-Position-Detection:%s" % Mode.name)
    bipd                 = generator.do_backward_read_position_detectors(Mode.bipd_sm_to_be_reversed_db,
                                                                         dial_db)
    # assert all_isinstance(bipd, (IfDoorIdReferencedCode, int, str, unicode))

    # (*) Main DFA -- try to match core patterns
    #     Post-context handling is webbed into the main state machine.
    Lng.debug_unit_name_set("Core:%s" % Mode.name)
    main, \
    main_analyzer        = generator.do_main(Mode.core_sm_list, ReloadStateForward, 
                                             dial_db)
    Lng.debug_unit_name_set("Extra:%s" % Mode.name)
    # assert all_isinstance(main, (IfDoorIdReferencedCode, int, str, unicode))
    extra                = generator.do_analyzer_list(Mode.extra_analyzer_list)

    # (*) Terminals
    #     (BEFORE 'Reload procedures' because some terminals may add entries
    #      to the reloader.)
    terminals            = generator.do_terminals(TerminalDb.values(), 
                                                  main_analyzer, 
                                                  dial_db)

    # (*) Reload procedures
    reload_procedure_fw  = generator.do_reload_procedure(main_analyzer)
    reload_procedure_bw  = generator.do_reload_procedure(pre_analyzer)

    # assert all_isinstance(reload_procedures, (IfDoorIdReferencedCode, int, str, unicode))

    # (*) Re-entry preparation
    Lng.debug_unit_name_set("Re-Entry-Preparation:%s" % Mode.name)
    reentry_preparation  = generator.do_reentry_preparation(Mode.pre_context_sm_id_list,
                                                            OnAfterMatchCode, 
                                                            dial_db)

    # (*) State Router
    #     (Something that can goto a state address by an given integer value)
    state_router         = generator.do_state_router(dial_db)
    # assert all_isinstance(state_router, (IfDoorIdReferencedCode, int, str, unicode))

    # (*) Variable Definitions
    #     (Code that defines all required variables for the analyzer)
    variable_db.require_registers(Mode.required_register_set)
    variable_definitions = generator.do_variable_definitions()
    # assert all_isinstance(variable_definitions, (IfDoorIdReferencedCode, int, str, unicode))

    # (*) Putting it all together
    function_body = []
    function_body.extend(pre_context)         # implementation of pre-contexts (if there are some)
    function_body.extend(main)                # main pattern matcher
    function_body.extend(extra)               # extra state machines (from 'Loopers')
    function_body.extend(bipd)                # (seldom != empty; only for pseudo-ambiguous post contexts)
    function_body.extend(terminals)           
    function_body.extend(state_router)        # route to state by index (only if no computed gotos)
    function_body.extend(reload_procedure_fw)
    function_body.extend(reload_procedure_bw)
    function_body.extend(reentry_preparation)   

    return function_body, variable_definitions
Beispiel #29
0
 def do_sequence(self, Sequence, fh=-1):
     return flatten_list_of_lists(
         self.do_Number(x) for x in Sequence
     )
Beispiel #30
0
    def get_elementary_trigger_sets(self, StateIdxList, epsilon_closure_db):
        """NOTE: 'epsilon_closure_db' must previously be calculcated by 
                 self.get_epsilon_closure_db(). This has to happen once
                 and for all in order to save computation time.
        
           Considers the trigger dictionary that contains a mapping from target state index 
           to the trigger set that triggers to it: 
     
                   target_state_index   --->   trigger_set 
    
           The trigger sets of different target state indices may intersect. As a result,
           this function produces a list of pairs:
    
                  [ state_index_list, elementary_trigger_set ]
    
           where the elementary trigger set is the set of all triggers that trigger
           at the same time to all states in the state_index_list. The list contains 
           for one state_index_list only one elementary_trigger_set. All elementary
           trigger sets are disjunct, i.e. they do not intersect.
    
          NOTE: A general solution of this problem would have to consider the 
                inspection of all possible subset combinations. The number of 
                combinations for N trigger sets is 2^N - which potentially blows
                the calculation power of the computer. Excessive optimizations
                would have to be programmed, if not the following were the case: 
    
          NOTE: Fortunately, we are dealing with one dimensional sets! Thus, there is
                a very effective way to determine the elementary trigger sets. Imagine
                three trigger sets stretching over the range of numbers as follows:

          different targets, e.g. T0, T1, T2 are triggered by different sets of letters
          in the alphabet. 
                                                                    letters of alphabet
                      ---------------------------------------------------->

                  T0  [---------)       [----------)
                  T1          [------)      [-----)
                  T2              [----------------------)    
    
          => elementary sets: 
     
             only T0  [-------)
             T0, T1           [-)
             only T1            [-)
             T1, T2               [--)
             only T2                 [---)          [----)
             T0, T2                      [---)     [)
             T0, T1, T2                      [-----)
        """
        # For Documentation Purposes: The following approach has been proven to be SLOWER
        #                             then the one currently implemented. May be, some time
        #                             it can be tweaked to be faster.
        #
        #                             Also, it is not proven to be correct! 
        #
        ##    trigger_list = []
        ##    for state_index in StateIdxList:
        ##        state = self.states[state_index]
        ##        for target_index, trigger_set in state.target_map.get_map().iteritems():
        ##            target_epsilon_closure = epsilon_closure_db[target_index] 
        ##            interval_list          = trigger_set.get_intervals(PromiseToTreatWellF=True)
        ##            trigger_list.extend([x, target_epsilon_closure] for x in interval_list])
        ##
        ##    trigger_list.sort(key=lambda x: x[0].begin)
        ##    for element in trigger_list:
        ##        # ... continue as shown below
        ##                
        ##    return combination_list

        ## Special Case -- Quickly Done: One State, One Target State
        ##        proposal = None
        ##        if len(StateIdxList) == 1:
        ##           state_idx = list(StateIdxList)[0]
        ##            if len(epsilon_closure_db[state_idx]) == 1:
        ##                if len(self.states[state_idx].target_map.get_map()) == 1:
        ##                    target, trigger_set = self.states[state_idx].target_map.get_map().items()[0]
        ##                    proposal = { (target,): NumberSet(trigger_set) }

        # (*) Accumulate the transitions for all states in the state list.
        #     transitions to the same target state are combined by union.
        history = flatten_list_of_lists(
            # -- trigger dictionary:  target_idx --> trigger set that triggers to target
            self.states[state_idx].target_map.get_trigger_set_line_up() 
            # NOTE: Duplicate entries in history are perfectly reasonable at this point,
            #       simply if two states trigger on the same character range to the same 
            #       target state. When ranges are opened/closed via the history items
            #       this algo keeps track of duplicates (see below).
            for state_idx in StateIdxList
        )

        # (*) sort history according to position
        history.sort(key = attrgetter("position")) # lambda a, b: cmp(a.position, b.position))

        # (*) build the elementary subset list 
        combinations           = {}          # use dictionary for uniqueness
        current_interval_begin = None
        current_target_indices = {}          # use dictionary for uniqueness
        current_target_epsilon_closure = []
        for item in history:
            # -- add interval and target indice combination to the data
            #    (only build interval when current begin is there, 
            #     when the interval size is not zero, and
            #     when the epsilon closure of target states is not empty)                   
            if current_interval_begin is not None and \
               current_interval_begin != item.position and \
               len(current_target_indices) != 0:

                interval = Interval(current_interval_begin, item.position)

                # current_target_epsilon_closure.sort()             
                key = tuple(sorted(current_target_epsilon_closure))
                ## Caused 3 failures in unit test:
                ## if len(current_target_epsilon_closure) == 1: key = current_target_epsilon_closure[0]  
                ## else:                                        key = tuple(sorted(current_target_epsilon_closure))
                combination = combinations.get(key)
                if combination is None:
                    combinations[key] = NumberSet(interval, ArgumentIsYoursF=True)
                else:
                    combination.unite_with(interval)
           
            # -- BEGIN / END of interval:
            #    add or delete a target state to the set of currently considered target states
            #    NOTE: More than one state can trigger on the same range to the same target state.
            #          Thus, one needs to keep track of the 'opened' target states.
            if item.change == E_Border.BEGIN:
                if current_target_indices.has_key(item.target_idx):
                    current_target_indices[item.target_idx] += 1
                else:
                    current_target_indices[item.target_idx] = 1
            else:        # == E_Border.END
                if current_target_indices[item.target_idx] > 1:
                    current_target_indices[item.target_idx] -= 1
                else:    
                    del current_target_indices[item.target_idx] 
    
            # -- re-compute the epsilon closure of the target states
            current_target_epsilon_closure = \
                self.get_epsilon_closure_of_state_set(current_target_indices.iterkeys(),
                                                      epsilon_closure_db)
            # -- set the begin of interval to come
            current_interval_begin = item.position                      
    
        ## if proposal is not None:
        ##    if    len(proposal)     != len(combinations) \
        ##       or proposal.keys()   != combinations.keys() \
        ##       or not proposal.values()[0].is_equal(combinations.values()[0]):
        ##        print "##proposal:    ", proposal
        ##        print "##combinations:", combinations

        # (*) create the list of pairs [target-index-combination, trigger_set] 
        return combinations
Beispiel #31
0
    def get_elementary_trigger_sets(self, StateIdxList, epsilon_closure_db):
        """NOTE: 'epsilon_closure_db' must previously be calculcated by 
                 self.get_epsilon_closure_db(). This has to happen once
                 and for all in order to save computation time.
        
           Considers the trigger dictionary that contains a mapping from target state index 
           to the trigger set that triggers to it: 
     
                   target_state_index   --->   trigger_set 
    
           The trigger sets of different target state indices may intersect. As a result,
           this function produces a list of pairs:
    
                  [ state_index_list, elementary_trigger_set ]
    
           where the elementary trigger set is the set of all triggers that trigger
           at the same time to all states in the state_index_list. The list contains 
           for one state_index_list only one elementary_trigger_set. All elementary
           trigger sets are disjunct, i.e. they do not intersect.
    
          NOTE: A general solution of this problem would have to consider the 
                inspection of all possible subset combinations. The number of 
                combinations for N trigger sets is 2^N - which potentially blows
                the calculation power of the computer. Excessive optimizations
                would have to be programmed, if not the following were the case: 
    
          NOTE: Fortunately, we are dealing with one dimensional sets! Thus, there is
                a very effective way to determine the elementary trigger sets. Imagine
                three trigger sets stretching over the range of numbers as follows:

          different targets, e.g. T0, T1, T2 are triggered by different sets of letters
          in the alphabet. 
                                                                    letters of alphabet
                      ---------------------------------------------------->

                  T0  [---------)       [----------)
                  T1          [------)      [-----)
                  T2              [----------------------)    
    
          => elementary sets: 
     
             only T0  [-------)
             T0, T1           [-)
             only T1            [-)
             T1, T2               [--)
             only T2                 [---)          [----)
             T0, T2                      [---)     [)
             T0, T1, T2                      [-----)
        """
        # For Documentation Purposes: The following approach has been proven to be SLOWER
        #                             then the one currently implemented. May be, some time
        #                             it can be tweaked to be faster.
        #
        #                             Also, it is not proven to be correct!
        #
        ##    trigger_list = []
        ##    for state_index in StateIdxList:
        ##        state = self.states[state_index]
        ##        for target_index, trigger_set in state.target_map.get_map().iteritems():
        ##            target_epsilon_closure = epsilon_closure_db[target_index]
        ##            interval_list          = trigger_set.get_intervals(PromiseToTreatWellF=True)
        ##            trigger_list.extend([x, target_epsilon_closure] for x in interval_list])
        ##
        ##    trigger_list.sort(key=lambda x: x[0].begin)
        ##    for element in trigger_list:
        ##        # ... continue as shown below
        ##
        ##    return combination_list

        ## Special Case -- Quickly Done: One State, One Target State
        ##        proposal = None
        ##        if len(StateIdxList) == 1:
        ##           state_idx = list(StateIdxList)[0]
        ##            if len(epsilon_closure_db[state_idx]) == 1:
        ##                if len(self.states[state_idx].target_map.get_map()) == 1:
        ##                    target, trigger_set = self.states[state_idx].target_map.get_map().items()[0]
        ##                    proposal = { (target,): NumberSet(trigger_set) }

        # (*) Accumulate the transitions for all states in the state list.
        #     transitions to the same target state are combined by union.
        history = flatten_list_of_lists(
            # -- trigger dictionary:  target_idx --> trigger set that triggers to target
            self.states[state_idx].target_map.get_trigger_set_line_up()
            # NOTE: Duplicate entries in history are perfectly reasonable at this point,
            #       simply if two states trigger on the same character range to the same
            #       target state. When ranges are opened/closed via the history items
            #       this algo keeps track of duplicates (see below).
            for state_idx in StateIdxList)

        # (*) sort history according to position
        history.sort(key=attrgetter(
            "position"))  # lambda a, b: cmp(a.position, b.position))

        # (*) build the elementary subset list
        combinations = {}  # use dictionary for uniqueness
        current_interval_begin = None
        current_target_indices = {}  # use dictionary for uniqueness
        current_target_epsilon_closure = []
        for item in history:
            # -- add interval and target indice combination to the data
            #    (only build interval when current begin is there,
            #     when the interval size is not zero, and
            #     when the epsilon closure of target states is not empty)
            if current_interval_begin is not None and \
               current_interval_begin != item.position and \
               len(current_target_indices) != 0:

                interval = Interval(current_interval_begin, item.position)

                # current_target_epsilon_closure.sort()
                key = tuple(sorted(current_target_epsilon_closure))
                ## Caused 3 failures in unit test:
                ## if len(current_target_epsilon_closure) == 1: key = current_target_epsilon_closure[0]
                ## else:                                        key = tuple(sorted(current_target_epsilon_closure))
                combination = combinations.get(key)
                if combination is None:
                    combinations[key] = NumberSet(interval,
                                                  ArgumentIsYoursF=True)
                else:
                    combination.unite_with(interval)

            # -- BEGIN / END of interval:
            #    add or delete a target state to the set of currently considered target states
            #    NOTE: More than one state can trigger on the same range to the same target state.
            #          Thus, one needs to keep track of the 'opened' target states.
            if item.change == E_Border.BEGIN:
                if current_target_indices.has_key(item.target_idx):
                    current_target_indices[item.target_idx] += 1
                else:
                    current_target_indices[item.target_idx] = 1
            else:  # == E_Border.END
                if current_target_indices[item.target_idx] > 1:
                    current_target_indices[item.target_idx] -= 1
                else:
                    del current_target_indices[item.target_idx]

            # -- re-compute the epsilon closure of the target states
            current_target_epsilon_closure = \
                self.get_epsilon_closure_of_state_set(current_target_indices.iterkeys(),
                                                      epsilon_closure_db)
            # -- set the begin of interval to come
            current_interval_begin = item.position

        ## if proposal is not None:
        ##    if    len(proposal)     != len(combinations) \
        ##       or proposal.keys()   != combinations.keys() \
        ##       or not proposal.values()[0].is_equal(combinations.values()[0]):
        ##        print "##proposal:    ", proposal
        ##        print "##combinations:", combinations

        # (*) create the list of pairs [target-index-combination, trigger_set]
        return combinations
Beispiel #32
0
def TRY_terminal_delimiter_sequence(Mode, UnicodeSequence,
                                    UnicodeEndSequencePattern,
                                    UponReloadDoneAdr):
    UnicodeEndSequencePattern.prepare_count_info(Mode.counter_db,
                                                 Setup.buffer_codec)

    # Trasform letter by letter.
    sequence = flatten_list_of_lists(
        transformation.do_character(x, Setup.buffer_codec)
        for x in UnicodeSequence)

    EndSequenceChunkN = len(sequence)

    # Column and line number count for closing delimiter
    run_time_counting_required_f, counter_txt = \
            counter_for_pattern.get(UnicodeEndSequencePattern, ShiftF=False)
    # The Closer Delimiter must be a string. As such it has a pre-determined size.
    assert not run_time_counting_required_f

    # Column and line number count for 'normal' character.
    tm, column_counter_per_chunk = \
            counter.get_XXX_counter_map(Mode.counter_db, "me->buffer._input_p",
                                    Trafo=Setup.buffer_codec)

    dummy, character_count_txt, dummy = \
            counter.get_core_step(tm, "me->buffer._input_p")

    txt = []
    for i, x in enumerate(sequence):
        txt.append(i)
        txt.append(Lng.IF_INPUT("==", "0x%X" % x,
                                FirstF=True))  # Opening the 'if'
        txt.append(i + 1)
        txt.append("%s\n" % Lng.INPUT_P_INCREMENT())

    Lng.INDENT(counter_txt, i + 1)
    if column_counter_per_chunk:
        txt.append(i + 1)
        if column_counter_per_chunk == UnicodeEndSequencePattern.count_info(
        ).column_n_increment_by_lexeme_length:
            txt += Lng.REEFERENCE_P_COLUMN_ADD("me->buffer._input_p",
                                               column_counter_per_chunk)
        else:
            txt += Lng.REEFERENCE_P_COLUMN_ADD(
                "(me->buffer._input_p - %i)" % EndSequenceChunkN,
                column_counter_per_chunk)
            txt.append(i + 1)
            txt.extend(counter_txt)
    txt.append(i + 1)
    txt.append("break;\n")

    for i, x in r_enumerate(sequence):
        txt.append(i)
        txt.append("%s" % Lng.IF_INPUT(
            "==", "0x%X" % Setup.buffer_limit_code, FirstF=False))  # Check BLC
        txt.append(i + 1)
        txt.append("%s\n" %
                   Lng.LEXEME_START_SET("me->buffer._input_p - %i" % i))
        txt.append(i + 1)
        txt.append(
            "%s\n" %
            Lng.GOTO_RELOAD(UponReloadDoneAdr, True, engine.FORWARD))  # Reload
        if i == 0: break
        txt.append(i)
        txt.append("%s" % Lng.ELSE)
        txt.append(i + 1)
        txt.append("%s\n" % Lng.INPUT_P_ADD(-i))
        txt.append(i)
        txt.append("%s\n" % Lng.END_IF())

    txt.append(i)
    txt.append("%s\n" % Lng.END_IF())

    txt.extend(character_count_txt)

    # print "##DEBUG:\n%s" % "".join(Lng.GET_PLAIN_STRINGS(txt))
    return txt
def do(loop_config, CaMap, SmList):
    """Perform separation:
    
         Parallel state machine  ---->    first transition  
                                       +  appendix state machine

         Appendix Sm-Id --> Original Sm-Id
    
    The 'first transition' is mounted on the loop state machine triggering an
    acceptance that causes a transit to the appendix state machine. 

    RETURNS: list of LoopMapEntry-s 
    """
    # ESSENTIAL: Delimiter state machines shall never match on a common lexeme!
    _assert_no_intersections(SmList)
    assert all(sm.get_id() is not None for sm in SmList)

    loop_map_1,     \
    original_iid_db = split_first_transition(SmList)
    # loop_map_1: list of [0] first transition character set
    #                     [1] appendix sm with first transition removed
    #
    # original_iid_db:    appendix sm id --> original sm id

    appendix_cmd_list_db = loop_config.get_appendix_terminal_cmd_list_db(
        CaMap, [sm for cs, sm in loop_map_1], original_iid_db)
    # appendix_cmd_list_db: appendix sm id --> CmdList(count action,
    #                                                  goto original terminal)

    loop_map_2 = \
        split_first_character_set_for_distinct_count_actions(CaMap,
                                                             loop_map_1)
    # loop_map_2: list of [0] character set where all elements
    #                         require same count actions
    #                     [1] count action
    #                     [2] appendix sm

    # For a 'state transition' it is required that all character sets
    # in the list are disjoint. Thus, any intersection must build its
    # on entry. Thus, some entries might have more than one appendix.
    loop_map_3 = combine_intersecting_character_sets(loop_map_2)
    # loop_map_2: list of [0] character set no character set intersects
    #                         with any other.
    #                     [1] count action
    #                     [2] list of (appendix sm)

    # A transition can only enter one state machine, so all appendix
    # state machines related to the same character set must be combined.
    loop_map_4,                   \
    combined_appendix_sm_list_raw = combine_appendix_sm_lists(loop_map_3)
    # loop_map_4: list of [0] (disjoint) character set
    #                     [1] count action for character set
    #                     [2] state machine id of related combined appendix sm
    # combined_appendix_sm_lists: list of all generated (combined) appendix sm-s

    loop_map_5, \
    combined_appendix_sm_list = determine_CmdLists(loop_config, loop_map_4,
                                                   combined_appendix_sm_list_raw,
                                                   original_iid_db)
    # loop_map_5: list of [0] (disjoint) character set
    #                     [1] CmdList = (count action, goto terminal/appendix sm),
    # combined_appendix_sm_list: contains only those combined appendix state machines
    #                            that do have transitions

    loop_map_6 = [
        LoopMapEntry(character_set,
                     IidCoupleTerminal=dial.new_incidence_id(),
                     Code=cmd_list) for character_set, cmd_list in loop_map_5
    ]
    # loop_map_6: list of LoopMapEntry-s

    # There must be a command list for any acceptance in the appendix
    # state machines.
    all_acceptance_id_set = flatten_list_of_lists(
        sm.acceptance_id_set() for sm in combined_appendix_sm_list)
    assert all(iid in appendix_cmd_list_db for iid in all_acceptance_id_set)

    return loop_map_6, combined_appendix_sm_list, appendix_cmd_list_db
Beispiel #34
0
def _get_plain_line_up(TargetMapList):
    return sorted(flatten_list_of_lists(
        target_map.get_trigger_set_line_up(Key=i)
        for i, target_map in enumerate(TargetMapList)),
                  key=lambda x: (x.position, x.change, x.target_idx))