Ejemplo n.º 1
0
def get_character_set_skipper(TriggerSet, LanguageDB):
    """This function implements simple 'skipping' in the sense of passing by
       characters that belong to a given set of characters--the TriggerSet.
    """
    assert TriggerSet.__class__.__name__ == "NumberSet"
    assert not TriggerSet.is_empty()

    skipper_index = sm_index.get()
    # Mini trigger map:  [ trigger set ] --> loop start
    # That means: As long as characters of the trigger set appear, we go to the loop start.
    transition_map = TransitionMap()
    transition_map.add_transition(TriggerSet, skipper_index)
    iteration_code = transition_block.do(transition_map.get_trigger_map(),
                                         skipper_index,
                                         InitStateF=False,
                                         DSM=None)

    comment_str = LanguageDB["$comment"]("Skip any character in " +
                                         TriggerSet.get_utf8_string())

    # Line and column number counting
    code_str = __set_skipper_lc_counting_replacements(
        trigger_set_skipper_template, TriggerSet)

    # The finishing touch
    txt = blue_print(code_str, [
        ["$$DELIMITER_COMMENT$$", comment_str],
        ["$$INPUT_P_INCREMENT$$", LanguageDB["$input/increment"]],
        ["$$INPUT_P_DECREMENT$$", LanguageDB["$input/decrement"]],
        ["$$INPUT_GET$$", LanguageDB["$input/get"]],
        [
            "$$IF_INPUT_EQUAL_DELIMITER_0$$",
            LanguageDB["$if =="]("SkipDelimiter$$SKIPPER_INDEX$$[0]")
        ],
        ["$$ENDIF$$", LanguageDB["$endif"]],
        ["$$LOOP_START$$", LanguageDB["$label-def"]("$input", skipper_index)],
        ["$$GOTO_LOOP_START$$", LanguageDB["$goto"]("$input", skipper_index)],
        [
            "$$LOOP_REENTRANCE$$", LanguageDB["$label-def"]("$entry",
                                                            skipper_index)
        ],
        ["$$RESTART$$", LanguageDB["$label-def"]("$input", skipper_index)],
        ["$$DROP_OUT$$", LanguageDB["$label-def"]("$drop-out", skipper_index)],
        [
            "$$DROP_OUT_DIRECT$$", LanguageDB["$label-def"]("$drop-out-direct",
                                                            skipper_index)
        ],
        ["$$GOTO_LOOP_START$$", LanguageDB["$goto"]("$entry", skipper_index)],
        ["$$SKIPPER_INDEX$$", repr(skipper_index)],
        ["$$GOTO_TERMINAL_EOF$$", LanguageDB["$goto"]("$terminal-EOF")],
        ["$$GOTO_REENTRY_PREPARATION$$", LanguageDB["$goto"]("$re-start")],
        ["$$MARK_LEXEME_START$$", LanguageDB["$mark-lexeme-start"]],
        ["$$ON_TRIGGER_SET_TO_LOOP_START$$", iteration_code],
    ])

    return blue_print(txt, [[
        "$$GOTO_DROP_OUT$$", LanguageDB["$goto"]("$drop-out", skipper_index)
    ]])
Ejemplo n.º 2
0
def __cpp_terminal_states(StateMachineName, sm, action_db, DefaultAction):
    
    # -- specific terminal states of patterns (entered from acceptance states)
    txt = ""
    for state_machine_id in action_db.keys():
        txt += "  %s:\n" % get_label("", None, state_machine_id)
        action_code = "    " + action_db[state_machine_id].replace("\n", "\n    ")   
        txt += "    QUEX_STREAM_SEEK(last_acceptance_input_position);"
        txt += action_code + "\n"    
        txt += "    // if action code returns from the function, then the following is meaningless\n"
        if sm.states[sm.init_state_index].transitions().is_empty() == False:
            txt += "    QUEX_STREAM_GET(input);"
        txt += "    goto QUEX_LABEL_%s_ENTRY_INITIAL_STATE;\n" %  StateMachineName

    specific_terminal_states_str = txt

    #  -- general terminal state (entered from non-acceptance state)    
    txt = ""    
    for state_machine_id in action_db.keys():
        txt += "     case %s: goto %s;\n" % \
                (repr(state_machine_id), get_label("", None, state_machine_id))
    jumps_to_acceptance_states_str = txt


    #     -- execute default pattern action 
    #     -- reset character stream to last success             
    #     -- goto initial state 
    txt = blue_print(__cpp_terminal_state_str, 
                     [["$$JUMPS_TO_ACCEPTANCE_STATE$$",    jumps_to_acceptance_states_str],   
                      ["$$SPECIFIC_TERMINAL_STATES$$",     specific_terminal_states_str],
                      ["$$DEFAULT_ACTION$$",               DefaultAction.replace("\n", "        \n")],
                      ["$$STATE_MACHINE_NAME$$",           StateMachineName],
                      ["$$INITIAL_STATE_INDEX_LABEL$$",    get_label(StateMachineName, sm.init_state_index)]])
    return txt
Ejemplo n.º 3
0
def __get_mode_init_call(mode, LexerClassName):

    header_str = "%s_%s_" % (LexerClassName, mode.name)

    analyser_function = header_str + "analyser_function"
    on_indentation = header_str + "on_indentation"
    on_entry = header_str + "on_entry"
    on_exit = header_str + "on_exit"
    has_base = header_str + "has_base"
    has_entry_from = header_str + "has_entry_from"
    has_exit_to = header_str + "has_exit_to"

    if mode.options["inheritable"] == "only":
        analyser_function = "QuexMode_uncallable_analyser_function"

    if mode.on_entry_code_fragments() == []:
        on_entry = "QuexMode_on_entry_exit_null_function"

    if mode.on_exit_code_fragments() == []:
        on_exit = "QuexMode_on_entry_exit_null_function"

    if mode.on_indentation_code_fragments() == []:
        on_indentation = "QuexMode_on_indentation_null_function"

    txt = blue_print(
        quex_mode_init_call_str,
        [["$$MN$$", mode.name], ["$analyser_function", analyser_function],
         ["$on_indentation", on_indentation], ["$on_entry", on_entry],
         ["$on_exit", on_exit], ["$has_base", has_base],
         ["$has_entry_from", has_entry_from], ["$has_exit_to", has_exit_to]])

    return txt
Ejemplo n.º 4
0
def do(sm, LanguageDB, PrintStateMachineF):

    decorated_state_machine = StateMachineDecorator(sm, 
                                                    "BACKWARD_DETECTOR_" + repr(sm.get_id()),
                                                    PostContextSM_ID_List = [], 
                                                    BackwardLexingF=True, 
                                                    BackwardInputPositionDetectionF=True)

    function_body = state_machine_coder.do(decorated_state_machine)

    sm_str = "    " + LanguageDB["$comment"]("state machine") + "\n"
    if PrintStateMachineF: 
        sm_str += LanguageDB["$ml-comment"](sm.get_string(NormalizeF=False)) + "\n"

    # -- input position detectors simply the next 'catch' and return
    function_body += LanguageDB["$label-def"]("$terminal-general", True) + "\n"
    function_body += LanguageDB["$input/seek_position"]("end_of_core_pattern_position") + "\n"
    function_body += LanguageDB["$input/increment"] + "\n"

    variables_txt = LanguageDB["$local-variable-defs"](
            [["QUEX_CHARACTER_TYPE",          "input",                        "(QUEX_CHARACTER_TYPE)(0x0)"],
             ["QUEX_CHARACTER_POSITION_TYPE", "end_of_core_pattern_position", "(QUEX_CHARACTER_TYPE*)(0x0)"]])

    return blue_print(function_str, 
                      [["$$ID$$",              repr(sm.get_id()).replace("L", "")],
                       ["$$FUNCTION_BODY$$",   function_body],
                       ["$$LOCAL_VARIABLES$$", variables_txt],
                       ["$$STATE_MACHINE$$",   sm_str],
                      ])
Ejemplo n.º 5
0
def __get_mode_init_call(mode):
    
    analyzer_function = "QUEX_NAME(%s_analyzer_function)" % mode.name
    on_indentation    = "QUEX_NAME(%s_on_indentation)"    % mode.name
    on_entry          = "QUEX_NAME(%s_on_entry)"          % mode.name
    on_exit           = "QUEX_NAME(%s_on_exit)"           % mode.name
    has_base          = "QUEX_NAME(%s_has_base)"          % mode.name
    has_entry_from    = "QUEX_NAME(%s_has_entry_from)"    % mode.name
    has_exit_to       = "QUEX_NAME(%s_has_exit_to)"       % mode.name

    if mode.options["inheritable"] == "only": 
        analyzer_function = "QUEX_NAME(Mode_uncallable_analyzer_function)"

    if mode.get_code_fragment_list("on_entry") == []:
        on_entry = "QUEX_NAME(Mode_on_entry_exit_null_function)"

    if mode.get_code_fragment_list("on_exit") == []:
        on_exit = "QUEX_NAME(Mode_on_entry_exit_null_function)"

    if mode.get_code_fragment_list("on_indentation") == []:
        on_indentation = "QUEX_NAME(Mode_on_indentation_null_function)"

    txt = blue_print(quex_mode_init_call_str,
                [["$$MN$$",             mode.name],
                 ["$analyzer_function", analyzer_function],
                 ["$on_indentation",    on_indentation],
                 ["$on_entry",          on_entry],
                 ["$on_exit",           on_exit],
                 ["$has_base",          has_base],
                 ["$has_entry_from",    has_entry_from],
                 ["$has_exit_to",       has_exit_to]])

    return txt
Ejemplo n.º 6
0
def __lc_counting_replacements(code_str, CharacterSet):
    """Line and Column Number Counting(Range Skipper):
     
         -- in loop if there appears a newline, then do:
            increment line_n
            set position from where to count column_n
         -- at end of skipping do one of the following:
            if end delimiter contains newline:
               column_n = number of letters since last new line in end delimiter
               increment line_n by number of newlines in end delimiter.
               (NOTE: in this case the setting of the position from where to count
                      the column_n can be omitted.)
            else:
               column_n = current_position - position from where to count column number.

       NOTE: On reload we do count the column numbers and reset the column_p.
    """
    variable_definition = "    __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n"

    in_loop = ""
    # Does the end delimiter contain a newline?
    if CharacterSet.contains(ord("\n")): in_loop = line_column_counter_in_loop

    end_procedure = "        __QUEX_IF_COUNT_COLUMNS_ADD((size_t)(me->buffer._input_p - reference_p));\n"
    before_reload = "       __QUEX_IF_COUNT_COLUMNS_ADD((size_t)(me->buffer._input_p - reference_p));\n"
    after_reload = "           __QUEX_IF_COUNT_COLUMNS(reference_p = me->buffer._input_p);\n"

    return blue_print(code_str, [
        ["$$LC_COUNT_COLUMN_N_POINTER_DEFINITION$$", variable_definition],
        ["$$LC_COUNT_IN_LOOP$$", in_loop],
        ["$$LC_COUNT_END_PROCEDURE$$", end_procedure],
        ["$$LC_COUNT_BEFORE_RELOAD$$", before_reload],
        ["$$LC_COUNT_AFTER_RELOAD$$", after_reload],
    ])
Ejemplo n.º 7
0
    def get_graphviz_string(self, NormalizeF=False):
        # (*) normalize the state indices
        index_map, inverse_index_map, index_sequence = self.__get_state_index_normalization(NormalizeF)

        # (*) Border of plot block
        frame_txt = """
        digraph state_machine_%i {
	       rankdir=LR;
	       size="8,5"
	       node [shape = doublecircle]; $$ACCEPTANCE_STATES$$;
           node [shape = circle];
           $$TRANSITIONS$$
        }
        """ % self.get_id()

        transition_str       = ""
        acceptance_state_str = ""
        for state_i in index_sequence:
            printed_state_i = index_map[state_i]
            state           = self.states[state_i]
            if state.is_acceptance(): 
                acceptance_state_str += "%i; " % int(printed_state_i)
            transition_str += state.get_graphviz_string(printed_state_i, index_map)

        if acceptance_state_str != "": acceptance_state_str = acceptance_state_str[:-2]
        return blue_print(frame_txt, [["$$ACCEPTANCE_STATES$$", acceptance_state_str],
                                      ["$$TRANSITIONS$$",       transition_str]])
Ejemplo n.º 8
0
def __lc_counting_replacements(code_str, CharacterSet):
    """Line and Column Number Counting(Range Skipper):
     
         -- in loop if there appears a newline, then do:
            increment line_n
            set position from where to count column_n
         -- at end of skipping do one of the following:
            if end delimiter contains newline:
               column_n = number of letters since last new line in end delimiter
               increment line_n by number of newlines in end delimiter.
               (NOTE: in this case the setting of the position from where to count
                      the column_n can be omitted.)
            else:
               column_n = current_position - position from where to count column number.

       NOTE: On reload we do count the column numbers and reset the column_p.
    """
    variable_definition = "    __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n"

    in_loop       = ""
    # Does the end delimiter contain a newline?
    if CharacterSet.contains(ord("\n")): in_loop = line_column_counter_in_loop

    end_procedure = "        __QUEX_IF_COUNT_COLUMNS_ADD((size_t)(me->buffer._input_p - reference_p));\n" 
    before_reload  = "       __QUEX_IF_COUNT_COLUMNS_ADD((size_t)(me->buffer._input_p - reference_p));\n" 
    after_reload   = "           __QUEX_IF_COUNT_COLUMNS(reference_p = me->buffer._input_p);\n" 

    return blue_print(code_str,
                     [["$$LC_COUNT_COLUMN_N_POINTER_DEFINITION$$", variable_definition],
                      ["$$LC_COUNT_IN_LOOP$$",                     in_loop],
                      ["$$LC_COUNT_END_PROCEDURE$$",               end_procedure],
                      ["$$LC_COUNT_BEFORE_RELOAD$$",               before_reload],
                      ["$$LC_COUNT_AFTER_RELOAD$$",                after_reload],
                      ])
Ejemplo n.º 9
0
def do_map_id_to_name_function():
    L = max(map(lambda name: len(name), token_id_db.keys()))
    def space(Name):
        return " " * (L - len(Name))

    # -- define the function for token names
    switch_cases = []
    token_names  = []
    for token_name in sorted(token_id_db.keys()):
        if token_name in standard_token_id_list: continue

        # UCS codepoints are coded directly as pure numbers
        if len(token_name) > 2 and token_name[:2] == "--":
            token = token_id_db[token_name]
            switch_cases.append("   case 0x%06X: return token_id_str_%s;\n" % \
                                (token.number, token.name))
            token_names.append("   static const char  token_id_str_%s[]%s = \"%s\";\n" % \
                               (token.name, space(token.name), token.name))
        else:
            switch_cases.append("   case %s%s:%s return token_id_str_%s;\n" % \
                                (Setup.token_id_prefix, token_name, space(token_name), token_name))
            token_names.append("   static const char  token_id_str_%s[]%s = \"%s\";\n" % \
                               (token_name, space(token_name), token_name))

    return blue_print(func_str,
                      [["$$TOKEN_ID_CASES$$", "".join(switch_cases)],
                       ["$$TOKEN_NAMES$$",    "".join(token_names)], ])
Ejemplo n.º 10
0
def write_mode_class_implementation(Modes, Setup):
    LexerClassName = Setup.output_engine_name
    TokenClassName = Setup.input_token_class_name
    OutputFilestem = Setup.output_file_stem
    DerivedClassName = Setup.input_derived_class_name
    DerivedClassHeaderFileName = Setup.input_derived_class_file
    ModeClassImplementationFile = Setup.output_code_file

    if DerivedClassHeaderFileName != "":
        txt = "#include<" + DerivedClassHeaderFileName + ">\n"
    else:
        txt = "#include\"" + OutputFilestem + "\"\n"

    # -- mode class member function definitions (on_entry, on_exit, has_base, ...)
    mode_class_member_functions_txt = mode_classes.do(Modes.values())

    mode_objects_txt = ""
    for mode_name in Modes:
        mode_objects_txt += "        QuexMode  $$LEXER_CLASS_NAME$$::%s;\n" % mode_name

    txt += "namespace quex {\n"
    txt += mode_objects_txt
    txt += mode_class_member_functions_txt
    txt += "} // END: namespace quex\n"

    txt = blue_print(txt, [["$$LEXER_CLASS_NAME$$", LexerClassName],
                           ["$$TOKEN_CLASS$$", TokenClassName],
                           ["$$LEXER_DERIVED_CLASS_NAME$$", DerivedClassName]])

    fh_out = open(ModeClassImplementationFile, "wb")
    if os.linesep != "\n": txt = txt.replace("\n", os.linesep)
    fh_out.write(txt)
    fh_out.close()
Ejemplo n.º 11
0
def write_mode_class_implementation(Modes, Setup):
    LexerClassName              = Setup.output_engine_name
    TokenClassName              = Setup.input_token_class_name
    OutputFilestem              = Setup.output_file_stem
    DerivedClassName            = Setup.input_derived_class_name
    DerivedClassHeaderFileName  = Setup.input_derived_class_file
    ModeClassImplementationFile = Setup.output_code_file

    if DerivedClassHeaderFileName != "": txt = "#include<" + DerivedClassHeaderFileName +">\n"
    else:                                txt = "#include\"" + OutputFilestem +"\"\n"
    
    # -- mode class member function definitions (on_entry, on_exit, has_base, ...)
    mode_class_member_functions_txt = mode_classes.do(Modes.values())

    mode_objects_txt = ""    
    for mode_name in Modes:
        mode_objects_txt += "        QuexMode  $$LEXER_CLASS_NAME$$::%s;\n" % mode_name

    txt += "namespace quex {\n"
    txt += mode_objects_txt
    txt += mode_class_member_functions_txt
    txt += "} // END: namespace quex\n"

    txt = blue_print(txt, [["$$LEXER_CLASS_NAME$$",         LexerClassName],
                          ["$$TOKEN_CLASS$$",              TokenClassName],
                          ["$$LEXER_DERIVED_CLASS_NAME$$", DerivedClassName]])
    
    fh_out = open(ModeClassImplementationFile, "wb")
    if os.linesep != "\n": txt = txt.replace("\n", os.linesep)
    fh_out.write(txt)
    fh_out.close()
Ejemplo n.º 12
0
def do(Modes):
    LexerClassName = Setup.analyzer_class_name
    TokenClassName = Setup.token_class_name
    DerivedClassName = Setup.analyzer_derived_class_name
    DerivedClassHeaderFileName = Setup.analyzer_derived_class_file

    if DerivedClassHeaderFileName != "":
        txt = "#include <" + get_file_reference(
            DerivedClassHeaderFileName) + ">\n"
    else:
        txt = "#include \"" + get_file_reference(
            Setup.output_header_file) + "\"\n"

    txt += "#include <quex/code_base/analyzer/C-adaptions.h>\n"

    # -- mode class member function definitions (on_entry, on_exit, has_base, ...)
    mode_class_member_functions_txt = write_member_functions(Modes.values())

    mode_objects_txt = ""
    for mode_name, mode in Modes.items():
        if mode.options["inheritable"] == "only": continue
        mode_objects_txt += "/* Global */QUEX_NAME(Mode)  QUEX_NAME(%s);\n" % mode_name

    txt += "QUEX_NAMESPACE_MAIN_OPEN\n"
    txt += mode_objects_txt
    txt += mode_class_member_functions_txt
    txt += "QUEX_NAMESPACE_MAIN_CLOSE\n"

    txt = blue_print(txt, [["$$LEXER_CLASS_NAME$$", LexerClassName],
                           ["$$LEXER_DERIVED_CLASS_NAME$$", DerivedClassName]])

    return txt
Ejemplo n.º 13
0
    def get_graphviz_string(self, NormalizeF=False):
        # (*) normalize the state indices
        index_map, inverse_index_map, index_sequence = self.__get_state_index_normalization(
            NormalizeF)

        # (*) Border of plot block
        frame_txt = """
        digraph state_machine_%i {
	       rankdir=LR;
	       size="8,5"
	       node [shape = doublecircle]; $$ACCEPTANCE_STATES$$;
           node [shape = circle];
           $$TRANSITIONS$$
        }
        """ % self.get_id()

        transition_str = ""
        acceptance_state_str = ""
        for state_i in index_sequence:
            printed_state_i = index_map[state_i]
            state = self.states[state_i]
            if state.is_acceptance():
                acceptance_state_str += "%i; " % int(printed_state_i)
            transition_str += state.get_graphviz_string(
                printed_state_i, index_map)

        if acceptance_state_str != "":
            acceptance_state_str = acceptance_state_str[:-2]
        return blue_print(frame_txt,
                          [["$$ACCEPTANCE_STATES$$", acceptance_state_str],
                           ["$$TRANSITIONS$$", transition_str]])
Ejemplo n.º 14
0
def __get_mode_init_call(mode, LexerClassName):
    
    header_str = "%s_%s_" % (LexerClassName, mode.name)

    analyser_function = header_str + "analyser_function" 
    on_indentation    = header_str + "on_indentation"    
    on_entry          = header_str + "on_entry"          
    on_exit           = header_str + "on_exit"           
    has_base          = header_str + "has_base"          
    has_entry_from    = header_str + "has_entry_from"    
    has_exit_to       = header_str + "has_exit_to"       

    if mode.options["inheritable"] == "only": 
        analyser_function = "QuexMode_uncallable_analyser_function"

    if mode.on_entry_code_fragments() == []:
        on_entry = "QuexMode_on_entry_exit_null_function"

    if mode.on_exit_code_fragments() == []:
        on_exit = "QuexMode_on_entry_exit_null_function"

    if mode.on_indentation_code_fragments() == []:
        on_indentation = "QuexMode_on_indentation_null_function"

    txt = blue_print(quex_mode_init_call_str,
                [["$$MN$$",             mode.name],
                 ["$analyser_function", analyser_function],
                 ["$on_indentation",    on_indentation],
                 ["$on_entry",          on_entry],
                 ["$on_exit",           on_exit],
                 ["$has_base",          has_base],
                 ["$has_entry_from",    has_entry_from],
                 ["$has_exit_to",       has_exit_to]])

    return txt
Ejemplo n.º 15
0
def do(Modes):
    LexerClassName              = Setup.analyzer_class_name
    TokenClassName              = Setup.token_class_name
    DerivedClassName            = Setup.analyzer_derived_class_name
    DerivedClassHeaderFileName  = Setup.analyzer_derived_class_file

    if DerivedClassHeaderFileName != "": txt = "#include <" + get_file_reference(DerivedClassHeaderFileName) +">\n"
    else:                                txt = "#include \"" + get_file_reference(Setup.output_header_file) +"\"\n"

    txt += "#include <quex/code_base/analyzer/C-adaptions.h>\n"

    # -- mode class member function definitions (on_entry, on_exit, has_base, ...)
    mode_class_member_functions_txt = write_member_functions(Modes.values())

    mode_objects_txt = ""    
    for mode_name, mode in Modes.items():
        if mode.options["inheritable"] == "only": continue
        mode_objects_txt += "/* Global */QUEX_NAME(Mode)  QUEX_NAME(%s);\n" % mode_name

    txt += "QUEX_NAMESPACE_MAIN_OPEN\n"
    txt += mode_objects_txt
    txt += mode_class_member_functions_txt
    txt += "QUEX_NAMESPACE_MAIN_CLOSE\n"

    txt = blue_print(txt, [["$$LEXER_CLASS_NAME$$",         LexerClassName],
                           ["$$LEXER_DERIVED_CLASS_NAME$$", DerivedClassName]])
    
    return txt
Ejemplo n.º 16
0
def get_on_indentation_handler(Mode):

    # 'on_dedent' and 'on_n_dedent cannot be defined at the same time.
    assert not (    Mode.has_code_fragment_list("on_dedent") \
                and Mode.has_code_fragment_list("on_n_dedent"))


    # A mode that deals only with the default indentation handler relies
    # on what is defined in '$QUEX_PATH/analayzer/member/on_indentation.i'
    if Mode.default_indentation_handler_sufficient():
        return "    return;"

    if Mode.has_code_fragment_list("on_indent"):
        on_indent_str, eol_f = action_code_formatter.get_code(Mode.get_code_fragment_list("on_indent"))
    else:
        on_indent_str = "self_send(__QUEX_SETTING_TOKEN_ID_INDENT);"

    if Mode.has_code_fragment_list("on_nodent"):
        on_nodent_str, eol_f = action_code_formatter.get_code(Mode.get_code_fragment_list("on_nodent"))
    else:
        on_nodent_str = "self_send(__QUEX_SETTING_TOKEN_ID_NODENT);"

    if Mode.has_code_fragment_list("on_dedent"):
        assert not Mode.has_code_fragment_list("on_n_dedent")
        on_dedent_str, eol_f = action_code_formatter.get_code(Mode.get_code_fragment_list("on_dedent"))
        on_n_dedent_str      = ""

    elif Mode.has_code_fragment_list("on_n_dedent"):
        assert not Mode.has_code_fragment_list("on_dedent")
        on_n_dedent_str, eol_f = action_code_formatter.get_code(Mode.get_code_fragment_list("on_n_dedent"))
        on_dedent_str          = ""

    else:
        # If no 'on_dedent' and no 'on_n_dedent' is defined ... 
        on_dedent_str    = ""
        on_n_dedent_str  = "#if defined(QUEX_OPTION_TOKEN_REPETITION_SUPPORT)\n"
        on_n_dedent_str += "    self_send_n(ClosedN, __QUEX_SETTING_TOKEN_ID_DEDENT);\n"
        on_n_dedent_str += "#else\n"
        on_n_dedent_str += "    while( start-- != stack->back ) self_send(__QUEX_SETTING_TOKEN_ID_DEDENT);\n"
        on_n_dedent_str += "#endif\n"

    if not Mode.has_code_fragment_list("on_indentation_error"):
        # Default: Blow the program if there is an indentation error.
        on_indentation_error = 'QUEX_ERROR_EXIT("Lexical analyzer mode \'%s\': indentation error detected!\\n"' \
                               % Mode.name + \
                               '                "No \'on_indentation_error\' handler has been specified.\\n");'
    else:
        on_indentation_error, eol_f = action_code_formatter.get_code(Mode.get_code_fragment_list("on_indentation_error"))

    # Note: 'on_indentation_bad' is applied in code generation for 
    #       indentation counter in 'indentation_counter.py'.
    txt = blue_print(on_indentation_str,
                     [["$$INDENT-PROCEDURE$$",            on_indent_str],
                      ["$$NODENT-PROCEDURE$$",            on_nodent_str],
                      ["$$DEDENT-PROCEDURE$$",            on_dedent_str],
                      ["$$N-DEDENT-PROCEDURE$$",          on_n_dedent_str],
                      ["$$INDENTATION-ERROR-PROCEDURE$$", on_indentation_error]])
    return txt
Ejemplo n.º 17
0
def __reload_definitions(InitialStateIndex):
    txt = []
    txt.append(Address("$reload-FORWARD", None, reload_forward_str))
    txt.append(blue_print(reload_init_state_forward_str,
                          [["$$INIT_STATE$$",    get_label("$entry", InitialStateIndex, U=True)],
                           ["$$END_OF_STREAM$$", get_label("$terminal-EOF", U=True)]]))
    # Append empty references to make sure that the addresses are implemented.
    txt.append(Address("$reload-BACKWARD", None, reload_backward_str))
    return txt
Ejemplo n.º 18
0
def get_character_set_skipper(TriggerSet, LanguageDB):
    """This function implements simple 'skipping' in the sense of passing by
       characters that belong to a given set of characters--the TriggerSet.
    """
    assert TriggerSet.__class__.__name__ == "NumberSet"
    assert not TriggerSet.is_empty()

    skipper_index = sm_index.get()
    # Mini trigger map:  [ trigger set ] --> loop start
    # That means: As long as characters of the trigger set appear, we go to the loop start.
    transition_map = TransitionMap()
    transition_map.add_transition(TriggerSet, skipper_index)
    iteration_code = transition_block.do(transition_map.get_trigger_map(), skipper_index, InitStateF=False, DSM=None)

    comment_str = LanguageDB["$comment"]("Skip any character in " + TriggerSet.get_utf8_string())

    # Line and column number counting
    code_str = __set_skipper_lc_counting_replacements(trigger_set_skipper_template, TriggerSet)

    # The finishing touch
    txt = blue_print(code_str,
                      [
                       ["$$DELIMITER_COMMENT$$",          comment_str],
                       ["$$INPUT_P_INCREMENT$$",          LanguageDB["$input/increment"]],
                       ["$$INPUT_P_DECREMENT$$",          LanguageDB["$input/decrement"]],
                       ["$$INPUT_GET$$",                  LanguageDB["$input/get"]],
                       ["$$IF_INPUT_EQUAL_DELIMITER_0$$", LanguageDB["$if =="]("SkipDelimiter$$SKIPPER_INDEX$$[0]")],
                       ["$$ENDIF$$",                      LanguageDB["$endif"]],
                       ["$$LOOP_START$$",                 LanguageDB["$label-def"]("$input", skipper_index)],
                       ["$$GOTO_LOOP_START$$",            LanguageDB["$goto"]("$input", skipper_index)],
                       ["$$LOOP_REENTRANCE$$",            LanguageDB["$label-def"]("$entry", skipper_index)],
                       ["$$RESTART$$",                    LanguageDB["$label-def"]("$input", skipper_index)],
                       ["$$DROP_OUT$$",                   LanguageDB["$label-def"]("$drop-out", skipper_index)],
                       ["$$DROP_OUT_DIRECT$$",            LanguageDB["$label-def"]("$drop-out-direct", skipper_index)],
                       ["$$GOTO_LOOP_START$$",            LanguageDB["$goto"]("$entry", skipper_index)],
                       ["$$SKIPPER_INDEX$$",              repr(skipper_index)],
                       ["$$GOTO_TERMINAL_EOF$$",          LanguageDB["$goto"]("$terminal-EOF")],
                       ["$$GOTO_REENTRY_PREPARATION$$",   LanguageDB["$goto"]("$re-start")],
                       ["$$MARK_LEXEME_START$$",          LanguageDB["$mark-lexeme-start"]],
                       ["$$ON_TRIGGER_SET_TO_LOOP_START$$", iteration_code],
                      ])

    return blue_print(txt,
                       [["$$GOTO_DROP_OUT$$", LanguageDB["$goto"]("$drop-out", skipper_index)]])
Ejemplo n.º 19
0
def __reload_definitions(InitialStateIndex):
    txt = []
    txt.append(Address("$reload-FORWARD", None, reload_forward_str))
    txt.append(
        blue_print(reload_init_state_forward_str, [[
            "$$INIT_STATE$$",
            get_label("$entry", InitialStateIndex, U=True)
        ], ["$$END_OF_STREAM$$",
            get_label("$terminal-EOF", U=True)]]))
    # Append empty references to make sure that the addresses are implemented.
    txt.append(Address("$reload-BACKWARD", None, reload_backward_str))
    return txt
Ejemplo n.º 20
0
def replace_keywords(program_txt, LanguageDB, NoIndentF):
    """Replaces pseudo-code keywords with keywords of the given language."""

    txt = blue_print(program_txt, LanguageDB.items())

    if NoIndentF == False:
        # delete the last newline, to prevent additional indentation
        if txt[-1] == "\n": txt = txt[:-1]
        # indent by four spaces
        # (if this happens in recursively called functions nested indented blocks
        #  are correctly indented, see NumberSet::get_condition_code() for example)     
        txt = txt.replace("\n", "\n    ") + "\n"
    
    return txt          
Ejemplo n.º 21
0
def __set_skipper_lc_counting_replacements(code_str, CharacterSet):
    """Line and Column Number Counting(Range Skipper):
     
         -- in loop if there appears a newline, then do:
            increment line_n
            set position from where to count column_n
         -- at end of skipping do one of the following:
            if end delimiter contains newline:
               column_n = number of letters since last new line in end delimiter
               increment line_n by number of newlines in end delimiter.
               (NOTE: in this case the setting of the position from where to count
                      the column_n can be omitted.)
            else:
               column_n = current_position - position from where to count column number.

       NOTE: On reload we do count the column numbers and reset the column_p.
    """
    variable_definition = \
      "#   if defined(QUEX_OPTION_LINE_NUMBER_COUNTING) || defined(QUEX_OPTION_COLUMN_NUMBER_COUNTING)\n" + \
      "#   ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \
      "    QUEX_CHARACTER_POSITION_TYPE column_count_p_$$SKIPPER_INDEX$$ = QuexBuffer_tell_memory_adr(&me->buffer);\n"+\
      "#   endif\n" + \
      "#   endif\n"
    in_loop = ""
    end_procedure = ""
    exit_loop = ""

    # Does the end delimiter contain a newline?
    if CharacterSet.contains(ord("\n")):
        in_loop = lc_counter_in_loop

    end_procedure = "#       ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \
                    "        self.counter._column_number_at_end +=   QuexBuffer_tell_memory_adr(&me->buffer)\n" + \
                    "                                              - column_count_p_$$SKIPPER_INDEX$$;\n" + \
                    "#       endif\n"
    before_reload  = "#      ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \
                     "       self.counter._column_number_at_end +=  QuexBuffer_tell_memory_adr(&me->buffer)\n" + \
                     "                                            - column_count_p_$$SKIPPER_INDEX$$;\n" + \
                     "#      endif\n"
    after_reload   = "#          ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \
                     "           column_count_p_$$SKIPPER_INDEX$$ = QuexBuffer_tell_memory_adr(&me->buffer);\n" + \
                     "#          endif\n"

    return blue_print(code_str, [
        ["$$LC_COUNT_COLUMN_N_POINTER_DEFINITION$$", variable_definition],
        ["$$LC_COUNT_IN_LOOP$$", in_loop],
        ["$$LC_COUNT_END_PROCEDURE$$", end_procedure],
        ["$$LC_COUNT_BEFORE_RELOAD$$", before_reload],
        ["$$LC_COUNT_AFTER_RELOAD$$", after_reload],
    ])
Ejemplo n.º 22
0
def replace_keywords(program_txt, LanguageDB, NoIndentF):
    """Replaces pseudo-code keywords with keywords of the given language."""

    txt = blue_print(program_txt, LanguageDB.items())

    if NoIndentF == False:
        # delete the last newline, to prevent additional indentation
        if txt[-1] == "\n": txt = txt[:-1]
        # indent by four spaces
        # (if this happens in recursively called functions nested indented blocks
        #  are correctly indented, see NumberSet::get_condition_code() for example)
        txt = txt.replace("\n", "\n    ") + "\n"

    return txt
Ejemplo n.º 23
0
def __set_skipper_lc_counting_replacements(code_str, CharacterSet):
    """Line and Column Number Counting(Range Skipper):
     
         -- in loop if there appears a newline, then do:
            increment line_n
            set position from where to count column_n
         -- at end of skipping do one of the following:
            if end delimiter contains newline:
               column_n = number of letters since last new line in end delimiter
               increment line_n by number of newlines in end delimiter.
               (NOTE: in this case the setting of the position from where to count
                      the column_n can be omitted.)
            else:
               column_n = current_position - position from where to count column number.

       NOTE: On reload we do count the column numbers and reset the column_p.
    """
    variable_definition = \
      "#   if defined(QUEX_OPTION_LINE_NUMBER_COUNTING) || defined(QUEX_OPTION_COLUMN_NUMBER_COUNTING)\n" + \
      "#   ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \
      "    QUEX_CHARACTER_POSITION_TYPE column_count_p_$$SKIPPER_INDEX$$ = QuexBuffer_tell_memory_adr(&me->buffer);\n"+\
      "#   endif\n" + \
      "#   endif\n"
    in_loop       = ""
    end_procedure = ""
    exit_loop     = ""

    # Does the end delimiter contain a newline?
    if CharacterSet.contains(ord("\n")):
        in_loop = lc_counter_in_loop

    end_procedure = "#       ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \
                    "        self.counter._column_number_at_end +=   QuexBuffer_tell_memory_adr(&me->buffer)\n" + \
                    "                                              - column_count_p_$$SKIPPER_INDEX$$;\n" + \
                    "#       endif\n"
    before_reload  = "#      ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \
                     "       self.counter._column_number_at_end +=  QuexBuffer_tell_memory_adr(&me->buffer)\n" + \
                     "                                            - column_count_p_$$SKIPPER_INDEX$$;\n" + \
                     "#      endif\n"
    after_reload   = "#          ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \
                     "           column_count_p_$$SKIPPER_INDEX$$ = QuexBuffer_tell_memory_adr(&me->buffer);\n" + \
                     "#          endif\n"

    return blue_print(code_str,
                     [["$$LC_COUNT_COLUMN_N_POINTER_DEFINITION$$", variable_definition],
                      ["$$LC_COUNT_IN_LOOP$$",                     in_loop],
                      ["$$LC_COUNT_END_PROCEDURE$$",               end_procedure],
                      ["$$LC_COUNT_BEFORE_RELOAD$$",               before_reload],
                      ["$$LC_COUNT_AFTER_RELOAD$$",                after_reload],
                      ])
Ejemplo n.º 24
0
def write_constructor_and_memento_functions(ModeDB):

    FileTemplate = os.path.normpath(QUEX_PATH
                                    + Setup.language_db["$code_base"] 
                                    + "/analyzer/TXT-Cpp.i")
    func_txt = get_file_content_or_die(FileTemplate)

    func_txt = blue_print(func_txt,
            [
                ["$$CONSTRUCTOR_EXTENSTION$$",                  lexer_mode.class_constructor_extension.get_code()],
                ["$$CONVERTER_HELPER_I$$",                      get_file_reference(Setup.output_buffer_codec_header_i)],
                ["$$CONSTRUCTOR_MODE_DB_INITIALIZATION_CODE$$", get_constructor_code(ModeDB.values())],
                ["$$MEMENTO_EXTENSIONS_PACK$$",                 lexer_mode.memento_pack_extension.get_code()],
                ["$$MEMENTO_EXTENSIONS_UNPACK$$",               lexer_mode.memento_unpack_extension.get_code()],
                ])
    return func_txt
Ejemplo n.º 25
0
def delete_unused_labels(Code):
    LanguageDB = Setup.language_db
    label_list = languages.label_db_get_unused_label_list()

    replacement_list_db = {}
    for label in label_list:
        original = LanguageDB["$label-pure"](label)
        replacement = LanguageDB["$ml-comment"](original)
        first_letter = original[0]
        if replacement_list_db.has_key(first_letter) == False:
            replacement_list_db[first_letter] = [[original, replacement]]
        else:
            replacement_list_db[first_letter].append([original, replacement])

    code = Code
    for first_letter, replacement_list in replacement_list_db.items():
        code = blue_print(code, replacement_list, first_letter)
    return code
Ejemplo n.º 26
0
def _do(UnicodeTrafoInfo, CodecName):
    """
    PURPOSE: Writes converters for conversion towards UTF8/UTF16/UCS2/UCS4.

    UnicodeTrafoInfo:

       Provides the information about the relation of character codes in a particular 
       coding to unicode character codes. It is provided in the following form:

       # Codec Values                 Unicode Values
       [ (Source0_Begin, Source0_End, TargetInterval0_Begin), 
         (Source1_Begin, Source1_End, TargetInterval1_Begin),
         (Source2_Begin, Source2_End, TargetInterval2_Begin), 
         ... 
       ]
    """
    codec_name = make_safe_identifier(CodecName).lower()
    utf8_epilog,  utf8_function_body  = ConverterWriterUTF8().do(UnicodeTrafoInfo)
    utf16_prolog, utf16_function_body = ConverterWriterUTF16().do(UnicodeTrafoInfo)
    dummy,        utf32_function_body = ConverterWriterUTF32().do(UnicodeTrafoInfo)

    # Provide only the constant which are necessary
    FileName = os.path.normpath(  QUEX_PATH
                                + Setup.language_db["$code_base"] 
                                + "/converter_helper/TXT-Codec.i")
    txt_i = blue_print(get_file_content_or_die(FileName), 
                       [["$$CODEC$$",       codec_name],
                        ["$$EPILOG$$",      utf8_epilog],
                        ["$$BODY_UTF8$$",   utf8_function_body],
                        ["$$BODY_UTF16$$",  utf16_function_body],
                        ["$$BODY_UTF32$$",  utf32_function_body]])

    # A separate declaration header is required
    FileName = os.path.normpath(  QUEX_PATH
                                + Setup.language_db["$code_base"] 
                                + "/converter_helper/TXT-Codec")
    template_h_txt = get_file_content_or_die(FileName)
    txt_h = template_h_txt.replace("$$CODEC$$", codec_name)
    return txt_h, txt_i
Ejemplo n.º 27
0
def __cpp_terminal_states(StateMachineName, sm, action_db, DefaultAction):

    # -- specific terminal states of patterns (entered from acceptance states)
    txt = ""
    for state_machine_id in action_db.keys():
        txt += "  %s:\n" % get_label("", None, state_machine_id)
        action_code = "    " + action_db[state_machine_id].replace(
            "\n", "\n    ")
        txt += "    QUEX_STREAM_SEEK(last_acceptance_input_position);"
        txt += action_code + "\n"
        txt += "    // if action code returns from the function, then the following is meaningless\n"
        if sm.states[sm.init_state_index].transitions().is_empty() == False:
            txt += "    QUEX_STREAM_GET(input);"
        txt += "    goto QUEX_LABEL_%s_ENTRY_INITIAL_STATE;\n" % StateMachineName

    specific_terminal_states_str = txt

    #  -- general terminal state (entered from non-acceptance state)
    txt = ""
    for state_machine_id in action_db.keys():
        txt += "     case %s: goto %s;\n" % \
                (repr(state_machine_id), get_label("", None, state_machine_id))
    jumps_to_acceptance_states_str = txt

    #     -- execute default pattern action
    #     -- reset character stream to last success
    #     -- goto initial state
    txt = blue_print(
        __cpp_terminal_state_str,
        [["$$JUMPS_TO_ACCEPTANCE_STATE$$", jumps_to_acceptance_states_str],
         ["$$SPECIFIC_TERMINAL_STATES$$", specific_terminal_states_str],
         ["$$DEFAULT_ACTION$$",
          DefaultAction.replace("\n", "        \n")],
         ["$$STATE_MACHINE_NAME$$", StateMachineName],
         [
             "$$INITIAL_STATE_INDEX_LABEL$$",
             get_label(StateMachineName, sm.init_state_index)
         ]])
    return txt
Ejemplo n.º 28
0
def _do(UnicodeTrafoInfo, CodecName):
    """
    PURPOSE: Writes converters for conversion towards UTF8/UTF16/UCS2/UCS4.

    UnicodeTrafoInfo:

       Provides the information about the relation of character codes in a particular 
       coding to unicode character codes. It is provided in the following form:

       # Codec Values                 Unicode Values
       [ (Source0_Begin, Source0_End, TargetInterval0_Begin), 
         (Source1_Begin, Source1_End, TargetInterval1_Begin),
         (Source2_Begin, Source2_End, TargetInterval2_Begin), 
         ... 
       ]
    """
    codec_name = make_safe_identifier(CodecName).lower()
    utf8_epilog, utf8_function_body = ConverterWriterUTF8().do(
        UnicodeTrafoInfo)
    utf16_prolog, utf16_function_body = ConverterWriterUTF16().do(
        UnicodeTrafoInfo)
    dummy, utf32_function_body = ConverterWriterUTF32().do(UnicodeTrafoInfo)

    # Provide only the constant which are necessary
    FileName = os.path.normpath(QUEX_PATH + Setup.language_db["$code_base"] +
                                "/converter_helper/TXT-Codec.i")
    txt_i = blue_print(get_file_content_or_die(FileName),
                       [["$$CODEC$$", codec_name], ["$$EPILOG$$", utf8_epilog],
                        ["$$BODY_UTF8$$", utf8_function_body],
                        ["$$BODY_UTF16$$", utf16_function_body],
                        ["$$BODY_UTF32$$", utf32_function_body]])

    # A separate declaration header is required
    FileName = os.path.normpath(QUEX_PATH + Setup.language_db["$code_base"] +
                                "/converter_helper/TXT-Codec")
    template_h_txt = get_file_content_or_die(FileName)
    txt_h = template_h_txt.replace("$$CODEC$$", codec_name)
    return txt_h, txt_i
Ejemplo n.º 29
0
def do(Data):
    """The generated code is very similar to the 'skipper' code. It is to be executed
       as soon as a 'real' newline arrived. Then it skips whitespace until the next 
       non-whitepace (also newline may trigger a 'stop'). 

       Dependent on the setup the indentation is determined.
    """
    IndentationSetup = Data["indentation_setup"]
    assert IndentationSetup.__class__.__name__ == "IndentationSetup"


    LanguageDB = Setup.language_db
    Mode = None
    if IndentationSetup.containing_mode_name() != "":
        Mode = lexer_mode.mode_db[IndentationSetup.containing_mode_name()]

    counter_index = sm_index.get()
    
    # Mini trigger map:  [ trigger set ] --> loop start
    # That means: As long as characters of the trigger set appear, we go to the loop start.

    trigger_map = []
    # If the indentation consists only of spaces, than it is 'uniform' ...
    if IndentationSetup.has_only_single_spaces():
        # Count indentation/column at end of run;
        # simply: current position - reference_p

        character_set = IndentationSetup.space_db.values()[0]
        for interval in character_set.get().get_intervals(PromiseToTreatWellF=True):
            trigger_map.append([interval, counter_index])

        # Reference Pointer: Define Variable, Initialize, determine how to subtact.
        end_procedure = \
        "    me->counter._indentation = (size_t)(QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer) - reference_p);\n" 
    else:
        # Count the indentation/column during the 'run'

        # Add the space counters
        for count, character_set in IndentationSetup.space_db.items():
            for interval in character_set.get().get_intervals(PromiseToTreatWellF=True):
                trigger_map.append([interval, IndentationCounter("space", count, counter_index)])

        # Add the grid counters
        for count, character_set in IndentationSetup.grid_db.items():
            for interval in character_set.get().get_intervals(PromiseToTreatWellF=True):
                trigger_map.append([interval, IndentationCounter("grid", count, counter_index)])

        # Reference Pointer: Not required.
        #                    No subtraction 'current_position - reference_p'.
        #                    (however, we pass 'reference_p' to indentation handler)
        end_procedure = "" 

    # Bad character detection
    if IndentationSetup.bad_character_set.get().is_empty() == False:
        for interval in IndentationSetup.bad_character_set.get().get_intervals(PromiseToTreatWellF=True):
            trigger_map.append([interval, IndentationCounter("bad", None, counter_index)])

    # Since we do not use a 'TransitionMap', there are some things we need 
    # to do by hand.
    arrange_trigger_map(trigger_map)

    local_variable_db = { "reference_p" : 
                          Variable("reference_p", 
                                   "QUEX_TYPE_CHARACTER_POSITION", 
                                   None, 
                                   "(QUEX_TYPE_CHARACTER_POSITION)0x0")
    }
    init_reference_p  = "    reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer);\n" + \
                        "    me->counter._indentation = (QUEX_TYPE_INDENTATION)0;\n"

    iteration_code = transition_block.do(trigger_map, 
                                         counter_index, 
                                         DSM=None, 
                                         GotoReload_Str="goto %s;" % get_label("$reload", counter_index))

    comment_str    = LanguageDB["$comment"]("Skip whitespace at line begin; count indentation.")

    # NOTE: Line and column number counting is off
    #       -- No newline can occur
    #       -- column number = indentation at the end of the process

    end_procedure += "    __QUEX_IF_COUNT_COLUMNS_ADD(me->counter._indentation);\n"
    if Mode == None or Mode.default_indentation_handler_sufficient():
        end_procedure += "    QUEX_NAME(on_indentation)(me, me->counter._indentation, reference_p);\n"
    else:
        # Definition of '%s_on_indentation' in mode_classes.py.
        end_procedure += "    QUEX_NAME(%s_on_indentation)(me, me->counter._indentation, reference_p);\n" \
                         % Mode.name

    # The finishing touch
    prolog = blue_print(prolog_txt,
                         [
                           ["$$DELIMITER_COMMENT$$",              comment_str],
                           ["$$INIT_REFERENCE_POINTER$$",         init_reference_p],
                           ["$$COUNTER_INDEX$$",                  repr(counter_index)],
                           ["$$INPUT_GET$$",                      LanguageDB["$input/get"]],
                         ])

    # The finishing touch
    epilog = blue_print(epilog_txt,
                      [
                       ["$$INPUT_P_INCREMENT$$",              LanguageDB["$input/increment"]],
                       ["$$INPUT_P_DECREMENT$$",              LanguageDB["$input/decrement"]],
                       ["$$IF_INPUT_EQUAL_DELIMITER_0$$",     LanguageDB["$if =="]("SkipDelimiter$$COUNTER_INDEX$$[0]")],
                       ["$$ENDIF$$",                          LanguageDB["$endif"]],
                       ["$$LOOP_REENTRANCE$$",                get_label("$entry",  counter_index)],
                       ["$$INPUT_EQUAL_BUFFER_LIMIT_CODE$$",  LanguageDB["$BLC"]],
                       ["$$RELOAD$$",                         get_label("$reload", counter_index)],
                       ["$$COUNTER_INDEX$$",                  repr(counter_index)],
                       ["$$GOTO_TERMINAL_EOF$$",              get_label("$terminal-EOF", U=True)],
                       # When things were skipped, no change to acceptance flags or modes has
                       # happend. One can jump immediately to the start without re-entry preparation.
                       ["$$GOTO_START$$",                     get_label("$start", U=True)], 
                       ["$$END_PROCEDURE$$",                  end_procedure],
                       ["$$BAD_CHARACTER_HANDLING$$",         get_bad_character_handler(Mode, IndentationSetup, counter_index)],
                      ])

    txt = [prolog]
    txt.extend(iteration_code)
    # txt.append(Address("$drop-out", counter_index))
    txt.append("\n")
    txt.append(epilog)

    return txt, local_variable_db
Ejemplo n.º 30
0
def do(setup, IndentationSupportF):
    """Creates a file of token-ids from a given set of names.
       Creates also a function:

       const string& $$token$$::map_id_to_name().
    """
    global file_str
    LanguageDB = Setup.language_db

    __propose_implicit_token_definitions()

    for standard_token_id in standard_token_id_list:
        assert token_id_db.has_key(standard_token_id)

    assert lexer_mode.token_type_definition != None, \
           "Token type has not been defined yet, see $QUEX_PATH/quex/core.py how to\n" + \
           "handle this."

    # (*) Token ID File ________________________________________________________________
    #
    #     The token id file can either be specified as database of
    #     token-id names, or as a file that directly assigns the token-ids
    #     to variables. If the flag '--user-token-id-file' is defined, then
    #     then the token-id file is provided by the user. Otherwise, the
    #     token id file is created by the token-id maker.
    #
    #     The token id maker considers the file passed by the option '-t'
    #     as the database file and creates a C++ file with the output filestem
    #     plus the suffix "--token-ids". Note, that the token id file is a
    #     header file.
    #
    if len(token_id_db.keys()) == len(standard_token_id_list):
        token_id_str = "%sTERMINATION and %sUNINITIALIZED" % \
                       (setup.token_id_prefix_plain, setup.token_id_prefix_plain) 
        # TERMINATION + UNINITIALIZED = 2 token ids. If they are the only ones nothing can be done.
        error_msg("Only token ids %s are defined.\n" % token_id_str + \
                  "Quex refuses to proceed. Please, use the 'token { ... }' section to\n" + \
                  "specify at least one other token id.")

    #______________________________________________________________________________________
    L = max(map(lambda name: len(name), token_id_db.keys()))
    def space(Name):
        return " " * (L - len(Name))

    # -- define values for the token ids
    def define_this(txt, token):
        if setup.language == "C":
            txt.append("#define %s%s %s((QUEX_TYPE_TOKEN_ID)%i)\n" \
                       % (setup.token_id_prefix_plain, token.name, space(token.name), token.number))
        else:
            txt.append("const QUEX_TYPE_TOKEN_ID %s%s%s = ((QUEX_TYPE_TOKEN_ID)%i);\n" \
                       % (setup.token_id_prefix_plain, token.name, space(token.name), token.number))

    if setup.token_id_foreign_definition_file != "":
        token_id_txt = ["#include \"%s\"\n" % get_file_reference(setup.token_id_foreign_definition_file)]

    else:
        if setup.language == "C": 
            prolog = ""
            epilog = ""
        else:
            prolog = LanguageDB["$namespace-open"](setup.token_id_prefix_name_space)
            epilog = LanguageDB["$namespace-close"](setup.token_id_prefix_name_space)

        token_id_txt = [prolog]

        # Assign values to tokens with no numeric identifier
        # NOTE: This has not to happen if token's are defined by the user's provided file.
        i = setup.token_id_counter_offset
        # Take the 'dummy_name' only to have the list sorted by name. The key 'dummy_name' 
        # may contain '--' to indicate a unicode value, so do not use it as name.
        for dummy_name, token in sorted(token_id_db.items()):
            if token.number == None: 
                while __is_token_id_occupied(i):
                    i += 1
                token.number = i; 

            define_this(token_id_txt, token)

        # Double check that no token id appears twice
        # Again, this can only happen, if quex itself produced the numeric values for the token
        token_list = token_id_db.values()
        for i, x in enumerate(token_list):
            for y in token_list[i+1:]:
                if x.number != y.number: continue
                error_msg("Token id '%s'" % x.name, x.file_name, x.line_n, DontExitF=True)
                error_msg("and token id '%s' have same numeric value '%s'." \
                          % (y.name, x.number), y.file_name, y.line_n, DontExitF=True)
                          
        token_id_txt.append(epilog)

    tc_descr   = lexer_mode.token_type_definition

    content = blue_print(file_str,
                         [["$$TOKEN_ID_DEFINITIONS$$",        "".join(token_id_txt)],
                          ["$$DATE$$",                        time.asctime()],
                          ["$$TOKEN_CLASS_DEFINITION_FILE$$", get_file_reference(lexer_mode.token_type_definition.get_file_name())],
                          ["$$INCLUDE_GUARD_EXT$$",           get_include_guard_extension(
                                                                  LanguageDB["$namespace-ref"](tc_descr.name_space) 
                                                                  + "__" + tc_descr.class_name)],
                          ["$$TOKEN_PREFIX$$",                setup.token_id_prefix]])

    write_safely_and_close(setup.output_token_id_file, content)
Ejemplo n.º 31
0
def get_implementation_of_mode_functions(mode, Modes):
    """Writes constructors and mode transition functions.

                  void quex::lexer::enter_EXAMPLE_MODE() { ... }

       where EXAMPLE_MODE is a lexer mode from the given lexer-modes, and
       'quex::lexer' is the lexical analysis class.
    """
    def __filter_out_inheritable_only(ModeNameList):
        result = []
        for name in ModeNameList:
            for mode in Modes:
                if mode.name == name:
                    if mode.options["inheritable"] != "only":
                        result.append(name)
                    break
        return result

    # (*) on enter
    on_entry_str = "#   ifdef QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK\n"
    on_entry_str += "    QUEX_NAME(%s).has_entry_from(FromMode);\n" % mode.name
    on_entry_str += "#   endif\n"
    for code_info in mode.get_code_fragment_list("on_entry"):
        on_entry_str += code_info.get_code()
        if on_entry_str[-1] == "\n": on_entry_str = on_entry_str[:-1]

    # (*) on exit
    on_exit_str = "#   ifdef QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK\n"
    on_exit_str += "    QUEX_NAME(%s).has_exit_to(ToMode);\n" % mode.name
    on_exit_str += "#   endif\n"
    for code_info in mode.get_code_fragment_list("on_exit"):
        on_exit_str += code_info.get_code()

    # (*) on indentation
    on_indentation_str = get_on_indentation_handler(mode)

    # (*) has base mode
    if mode.has_base_mode():
        base_mode_list = __filter_out_inheritable_only(
            mode.get_base_mode_name_list())
        has_base_mode_str = get_IsOneOfThoseCode(base_mode_list)
    else:
        has_base_mode_str = "    return false;"

    # (*) has entry from
    try:
        entry_list = __filter_out_inheritable_only(mode.options["entry"])
        has_entry_from_str = get_IsOneOfThoseCode(
            entry_list,
            __filter_out_inheritable_only(ConsiderDerivedClassesF=true))
        # check whether the mode we come from is an allowed mode
    except:
        has_entry_from_str = "    return true; /* default */"

    # (*) has exit to
    try:
        exit_list = __filter_out_inheritable_only(mode.options["exit"])
        has_exit_to_str = get_IsOneOfThoseCode(exit_list,
                                               ConsiderDerivedClassesF=true)
    except:
        has_exit_to_str = "    return true; /* default */"

    txt = blue_print(
        mode_function_implementation_str,
        [
            ["$$ENTER-PROCEDURE$$", on_entry_str],
            ["$$EXIT-PROCEDURE$$", on_exit_str],
            #
            ["$$ON_INDENTATION-PROCEDURE$$", on_indentation_str],
            #
            ["$$HAS_BASE_MODE$$", has_base_mode_str],
            ["$$HAS_ENTRANCE_FROM$$", has_entry_from_str],
            ["$$HAS_EXIT_TO$$", has_exit_to_str],
            #
            ["$$MODE_NAME$$", mode.name],
        ])
    return txt
Ejemplo n.º 32
0
def get_implementation_of_mode_functions(mode, Modes):
    """Writes constructors and mode transition functions.

                  void quex::lexer::enter_EXAMPLE_MODE() { ... }

       where EXAMPLE_MODE is a lexer mode from the given lexer-modes, and
       'quex::lexer' is the lexical analysis class.
    """
    # (*) on enter
    code_fragments = mode.on_entry_code_fragments()
    on_entry_str = "#ifdef __QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK\n"
    on_entry_str += "assert(me->%s.has_entry_from(FromMode));\n" % mode.name
    on_entry_str += "#endif\n"
    for code_info in code_fragments:
        on_entry_str += code_info.get_code()
        if on_entry_str[-1] == "\n": on_entry_str = on_entry_str[:-1]

    # (*) on exit
    code_fragments = mode.on_exit_code_fragments()
    on_exit_str = "#ifdef __QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK\n"
    on_exit_str += "assert(me->%s.has_exit_to(ToMode));\n" % mode.name
    on_exit_str += "#endif\n"
    for code_info in code_fragments:
        on_exit_str += code_info.get_code()

    # (*) on indentation
    code_fragments = mode.on_indentation_code_fragments()
    on_indentation_str = "assert(Indentation >= 0);"
    for code_info in code_fragments:
        on_indentation_str += code_info.get_code()

    # (*) has base mode
    if mode.get_base_modes() != []:
        has_base_mode_str = get_IsOneOfThoseCode(mode.get_base_modes())
    else:
        has_base_mode_str = "    return false;"

    # (*) has entry from
    try:
        entry_list = mode.options["entry"]
        has_entry_from_str = get_IsOneOfThoseCode(entry_list,
                                                  ConsiderDerivedClassesF=true)
        # check whether the mode we come from is an allowed mode
    except:
        has_entry_from_str = "    return true; // default"

    # (*) has exit to
    try:
        exit_list = mode.options["exit"]
        has_exit_to_str = get_IsOneOfThoseCode(exit_list,
                                               ConsiderDerivedClassesF=true)
    except:
        has_exit_to_str = "    return true; // default"

    txt = blue_print(mode_function_implementation_str, [
        ["%%ENTER-PROCEDURE%%", on_entry_str],
        ["%%EXIT-PROCEDURE%%", on_exit_str],
        ["%%INDENTATION-PROCEDURE%%", on_indentation_str],
        ["%%HAS_BASE_MODE%%", has_base_mode_str],
        ["%%HAS_ENTRANCE_FROM%%", has_entry_from_str],
        ["%%HAS_EXIT_TO%%", has_exit_to_str],
        ["%%MODE_NAME%%", mode.name],
    ])
    return txt
Ejemplo n.º 33
0
def get_skipper(OpenerSequence, CloserSequence, Mode=None, IndentationCounterTerminalID=None, OnSkipRangeOpenStr=""):
    assert OpenerSequence.__class__  == list
    assert len(OpenerSequence)       >= 1
    assert map(type, OpenerSequence) == [int] * len(OpenerSequence)
    assert CloserSequence.__class__  == list
    assert len(CloserSequence)       >= 1
    assert map(type, CloserSequence) == [int] * len(CloserSequence)
    assert OpenerSequence != CloserSequence

    LanguageDB    = Setup.language_db

    skipper_index = sm_index.get()

    opener_str, opener_length_str, opener_comment_str = get_character_sequence(OpenerSequence)
    closer_str, closer_length_str, closer_comment_str = get_character_sequence(CloserSequence)

    if not end_delimiter_is_subset_of_indentation_counter_newline(Mode, CloserSequence):
        goto_after_end_of_skipping_str = "goto %s;" % get_label("$start", U=True)
    else:
        # If there is indentation counting involved, then the counter's terminal id must
        # be determined at this place.
        assert IndentationCounterTerminalID != None
        # If the ending delimiter is a subset of what the 'newline' pattern triggers 
        # in indentation counting => move on to the indentation counter.
        goto_after_end_of_skipping_str = "goto %s;" % get_label("$terminal-direct", 
                                                                IndentationCounterTerminalID, U=True)

    if OnSkipRangeOpenStr != "": on_skip_range_open_str = OnSkipRangeOpenStr
    else:                        on_skip_range_open_str = get_on_skip_range_open(Mode, CloserSequence)

    local_variable_db = { 
        "counter":     Variable("counter", "size_t", None, "0"),
        "QUEX_OPTION_COLUMN_NUMBER_COUNTING/reference_p": 
                       Variable("reference_p", 
                                "QUEX_TYPE_CHARACTER_POSITION", 
                                None,
                                "(QUEX_TYPE_CHARACTER_POSITION)0x0", 
                                "QUEX_OPTION_COLUMN_NUMBER_COUNTING")
    }
    reference_p_def = "    __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n"

    reference_p_def = "    __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n"
    before_reload   = "    __QUEX_IF_COUNT_COLUMNS_ADD((size_t)(QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer)\n" + \
                      "                                - reference_p));\n" 
    after_reload    = "        __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n"

    if CloserSequence[-1] == ord('\n'):
        end_procedure  = "       __QUEX_IF_COUNT_LINES_ADD((size_t)1);\n"
        end_procedure += "       __QUEX_IF_COUNT_COLUMNS_SET((size_t)1);\n"
    else:
        end_procedure = "        __QUEX_IF_COUNT_COLUMNS_ADD((size_t)(QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer)\n" + \
                        "                                    - reference_p));\n" 

    code_str = blue_print(template_str,
                          [
                           ["$$SKIPPER_INDEX$$",   __nice(skipper_index)],
                           #
                           ["$$OPENER$$",          opener_str],
                           ["$$OPENER_LENGTH$$",   opener_length_str],
                           ["$$OPENER_COMMENT$$",  opener_comment_str],
                           ["$$CLOSER$$",          closer_str],
                           ["$$CLOSER_LENGTH$$",   closer_length_str],
                           ["$$CLOSER_COMMENT$$",  closer_comment_str],
                           # 
                           ["$$INPUT_P_INCREMENT$$",              LanguageDB["$input/increment"]],
                           ["$$INPUT_P_DECREMENT$$",              LanguageDB["$input/decrement"]],
                           ["$$INPUT_GET$$",                      LanguageDB["$input/get"]],
                           ["$$IF_INPUT_EQUAL_DELIMITER_0$$",     LanguageDB["$if =="]("Skipper$$SKIPPER_INDEX$$[0]")],
                           ["$$ENDIF$$",                          LanguageDB["$endif"]],
                           ["$$ENTRY$$",                          get_label("$entry", skipper_index)],
                           ["$$RELOAD$$",                         get_label("$reload", skipper_index)],
                           ["$$GOTO_AFTER_END_OF_SKIPPING$$",     goto_after_end_of_skipping_str], 
                           ["$$GOTO_RELOAD$$",                    get_label("$reload", skipper_index)],
                           # When things were skipped, no change to acceptance flags or modes has
                           # happend. One can jump immediately to the start without re-entry preparation.
                           ["$$GOTO_ENTRY$$",                     get_label("$entry", skipper_index)],
                           ["$$MARK_LEXEME_START$$",              LanguageDB["$mark-lexeme-start"]],
                           ["$$ON_SKIP_RANGE_OPEN$$",             on_skip_range_open_str],
                           #
                           ["$$LC_COUNT_COLUMN_N_POINTER_DEFINITION$$", reference_p_def],
                           ["$$LC_COUNT_IN_LOOP$$",                     line_column_counter_in_loop],
                           ["$$LC_COUNT_END_PROCEDURE$$",               end_procedure],
                           ["$$LC_COUNT_BEFORE_RELOAD$$",               before_reload],
                           ["$$LC_COUNT_AFTER_RELOAD$$",                after_reload],
                          ])

    return code_str, local_variable_db
Ejemplo n.º 34
0
def write_engine_header(Modes, Setup):

    QuexClassHeaderFileTemplate = (Setup.QUEX_TEMPLATE_DB_DIR 
                                   + "/template/lexical_analyzer_class").replace("//","/")
    CoreEngineDefinitionsHeader = (Setup.QUEX_TEMPLATE_DB_DIR + "/core_engine/").replace("//","/")
    QuexClassHeaderFileOutput   = Setup.output_file_stem
    LexerClassName              = Setup.output_engine_name
    VersionID                   = Setup.input_application_version_id
    QuexVersionID               = Setup.QUEX_VERSION

    # -- determine character type according to number of bytes per ucs character code point
    #    for the internal engine.
    quex_character_type_str = { 1: "uint8_t ", 2: "uint16_t", 4: "uint32_t", 
                                   "wchar_t": "wchar_t" }[Setup.bytes_per_ucs_code_point]
    quex_lexeme_type_str    = { 1: "char    ", 2: "int16_t",  4: "int32_t",  
                                   "wchar_t": "wchar_t" }[Setup.bytes_per_ucs_code_point]

    #    are bytes of integers Setup 'little endian' or 'big endian' ?
    if Setup.byte_order == "little":
        quex_coding_name_str = { 1: "ASCII", 2: "UCS-2LE", 4: "UCS-4LE", 
                                    "wchar_t": "WCHAR_T" }[Setup.bytes_per_ucs_code_point]
    else:
        quex_coding_name_str = { 1: "ASCII", 2: "UCS-2BE", 4: "UCS-4BE", 
                                    "wchar_t": "WCHAR_T" }[Setup.bytes_per_ucs_code_point]


    # -- determine whether the lexical analyser needs indentation counting
    #    support. if one mode has an indentation handler, than indentation
    #    support must be provided.
    indentation_support_f = False
    for mode in Modes.values():
        if mode.on_indentation.get_code() != "":
            indentation_support_f = True
            break

    lex_id_definitions_str = "" 
    # NOTE: First mode-id needs to be '1' for compatibility with flex generated engines
    i = 0
    for name in Modes.keys():
        i += 1
        lex_id_definitions_str += "const int LEX_ID_%s = %i;\n" % (name, i)

    include_guard_extension = get_include_guard_extension(Setup.output_file_stem)

    # -- instances of mode classes as members of the lexer
    mode_object_members_txt,     \
    constructor_txt,             \
    mode_specific_functions_txt, \
    friend_txt =                 \
         get_mode_class_related_code_fragments(Modes.values(), LexerClassName)

    # -- define a pointer that directly has the type of the derived class
    if Setup.input_derived_class_name == "":
        Setup.input_derived_class_name = LexerClassName
        derived_class_type_declaration = ""
    else:
        derived_class_type_declaration = "class %s;" % Setup.input_derived_class_name

    # -- the friends of the class
    friends_str = ""
    for friend in Setup.input_lexer_class_friends:
        friends_str += "    friend class %s;\n" % friend

    # -- the class body extension
    class_body_extension_str = lexer_mode.class_body.get_code()

    # -- the class constructor extension
    class_constructor_extension_str = lexer_mode.class_init.get_code()

    fh = open_file_or_die(QuexClassHeaderFileTemplate)
    template_code_txt = fh.read()
    fh.close()

    # -- check if exit/entry handlers have to be active
    entry_handler_active_f = False
    exit_handler_active_f = False
    for mode in Modes.values():
        if mode.on_entry_code_fragments() != []: entry_handler_active_f = True
        if mode.on_exit_code_fragments() != []:  exit_handler_active_f = True

    txt = template_code_txt
    def set_switch(txt, SwitchF, Name):
        if SwitchF: txt = txt.replace("$$SWITCH$$ %s" % Name, "#define    %s" % Name)
        else:       txt = txt.replace("$$SWITCH$$ %s" % Name, "// #define %s" % Name)
        return txt
    
    txt = set_switch(txt, entry_handler_active_f,  "__QUEX_OPTION_ON_ENTRY_HANDLER_PRESENT")
    txt = set_switch(txt, exit_handler_active_f,   "__QUEX_OPTION_ON_EXIT_HANDLER_PRESENT")
    txt = set_switch(txt, indentation_support_f,   "__QUEX_OPTION_INDENTATION_TRIGGER_SUPPORT")     
    txt = set_switch(txt, True,                    "__QUEX_OPTION_SUPPORT_BEGIN_OF_LINE_PRE_CONDITION")
    txt = set_switch(txt, Setup.enable_iconv_f,    "QUEX_OPTION_ENABLE_ICONV")
    txt = set_switch(txt, not Setup.disable_token_queue_f,        "QUEX_OPTION_TOKEN_SENDING_VIA_QUEUE")
    txt = set_switch(txt, not Setup.disable_string_accumulator_f, "QUEX_OPTION_STRING_ACCUMULATOR")
    txt = set_switch(txt, Setup.post_categorizer_f,               "QUEX_OPTION_POST_CATEGORIZER")
    txt = set_switch(txt, True,                    "QUEX_OPTION_VIRTUAL_FUNCTION_ON_ACTION_ENTRY")      
    txt = set_switch(txt, True,                    "QUEX_OPTION_LINE_NUMBER_COUNTING")      
    txt = set_switch(txt, True,                    "QUEX_OPTION_COLUMN_NUMBER_COUNTING")        
    txt = set_switch(txt, Setup.output_debug_f,    "QUEX_OPTION_DEBUG_TOKEN_SENDING")
    txt = set_switch(txt, Setup.output_debug_f,    "QUEX_OPTION_DEBUG_MODE_TRANSITIONS")
    txt = set_switch(txt, Setup.output_debug_f,    "QUEX_OPTION_DEBUG_QUEX_PATTERN_MATCHES")
    txt = set_switch(txt, True,                    "QUEX_OPTION_INCLUDE_STACK_SUPPORT")
    txt = set_switch(txt, not Setup.no_mode_transition_check_f,           
                               "QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK")

    txt = blue_print(txt,
            [
                ["$$BUFFER_LIMIT_CODE$$",            "0x%X" % Setup.buffer_limit_code],
                ["$$CONSTRUCTOR_EXTENSTION$$",                  class_constructor_extension_str],
                ["$$CONSTRUCTOR_MODE_DB_INITIALIZATION_CODE$$", constructor_txt],
                ["$$CORE_ENGINE_DEFINITIONS_HEADER$$",          CoreEngineDefinitionsHeader],
                ["$$CLASS_BODY_EXTENSION$$",         class_body_extension_str],
                ["$$INCLUDE_GUARD_EXTENSION$$",      include_guard_extension],
                ["$$INITIAL_LEXER_MODE_ID$$",        "LEX_ID_" + lexer_mode.initial_mode.get_code()],
                ["$$LEXER_BUILD_DATE$$",             time.asctime()],
                ["$$LEXER_BUILD_VERSION$$",          VersionID],
                ["$$LEXER_CLASS_FRIENDS$$",          friends_str],
                ["$$LEXER_CLASS_NAME$$",             LexerClassName],
                ["$$LEXER_DERIVED_CLASS_DECL$$",     derived_class_type_declaration],
                ["$$LEXER_DERIVED_CLASS_NAME$$",     Setup.input_derived_class_name],
                ["$$LEX_ID_DEFINITIONS$$",           lex_id_definitions_str],
                ["$$MAX_MODE_CLASS_N$$",             repr(len(Modes))],
                ["$$MODE_CLASS_FRIENDS$$",           friend_txt],
                ["$$MODE_OBJECT_MEMBERS$$",              mode_object_members_txt],
                ["$$MODE_SPECIFIC_ANALYSER_FUNCTIONS$$", mode_specific_functions_txt],
                ["$$PRETTY_INDENTATION$$",               "     " + " " * (len(LexerClassName)*2 + 2)],
                ["$$QUEX_TEMPLATE_DIR$$",                Setup.QUEX_TEMPLATE_DB_DIR],
                ["$$QUEX_VERSION$$",                     QuexVersionID],
                ["$$TOKEN_CLASS$$",                      Setup.input_token_class_name],
                ["$$TOKEN_CLASS_DEFINITION_FILE$$",      Setup.input_token_class_file.replace("//","/")],
                ["$$TOKEN_ID_DEFINITION_FILE$$",         Setup.output_token_id_file.replace("//","/")],
                ["$$QUEX_CHARACTER_TYPE$$",              quex_character_type_str],
                ["$$QUEX_LEXEME_TYPE$$",                 quex_lexeme_type_str],
                ["$$CORE_ENGINE_CHARACTER_CODING$$",     quex_coding_name_str],
                ["$$USER_DEFINED_HEADER$$",              lexer_mode.header.get_code() + "\n"],
             ])

    fh_out = open(QuexClassHeaderFileOutput, "wb")
    if os.linesep != "\n": txt = txt.replace("\n", os.linesep)
    fh_out.write(txt)
    fh_out.close()
Ejemplo n.º 35
0
def get_skipper(EndSequence, Mode=None, IndentationCounterTerminalID=None, OnSkipRangeOpenStr=""):
    assert type(EndSequence) == list
    assert len(EndSequence) >= 1
    assert map(type, EndSequence) == [int] * len(EndSequence)

    local_variable_db = {}

    global template_str

    LanguageDB   = Setup.language_db

    # Name the $$SKIPPER$$
    skipper_index = sm_index.get()

    # Determine the $$DELIMITER$$
    delimiter_str,        \
    delimiter_length_str, \
    delimiter_comment_str \
                          = get_character_sequence(EndSequence)

    delimiter_comment_str  = LanguageDB["$comment"]("                         Delimiter: " 
                                                    + delimiter_comment_str)

    # Determine the check for the tail of the delimiter
    delimiter_remainder_test_str = ""
    if len(EndSequence) != 1: 
        txt = ""
        i = 0
        for letter in EndSequence[1:]:
            i += 1
            txt += "    " + LanguageDB["$input/get-offset"](i-1) + "\n"
            txt += "    " + LanguageDB["$if !="]("Skipper$$SKIPPER_INDEX$$[%i]" % i)
            txt += "         goto %s;" % get_label("$entry", skipper_index, U=True) 
            txt += "    " + LanguageDB["$endif"]
        delimiter_remainder_test_str = txt

    if not end_delimiter_is_subset_of_indentation_counter_newline(Mode, EndSequence):
        goto_after_end_of_skipping_str = "goto %s;" % get_label("$start", U=True)
    else:
        # If there is indentation counting involved, then the counter's terminal id must
        # be determined at this place.
        assert IndentationCounterTerminalID != None
        # If the ending delimiter is a subset of what the 'newline' pattern triggers 
        # in indentation counting => move on to the indentation counter.
        goto_after_end_of_skipping_str = "goto %s;" % get_label("$terminal-direct", IndentationCounterTerminalID, U=True)

    if OnSkipRangeOpenStr != "": on_skip_range_open_str = OnSkipRangeOpenStr
    else:                        on_skip_range_open_str = get_on_skip_range_open(Mode, EndSequence)

    # The main part
    code_str = blue_print(template_str,
                          [["$$DELIMITER$$",                      delimiter_str],
                           ["$$DELIMITER_LENGTH$$",               delimiter_length_str],
                           ["$$DELIMITER_COMMENT$$",              delimiter_comment_str],
                           ["$$WHILE_1_PLUS_1_EQUAL_2$$",         LanguageDB["$loop-start-endless"]],
                           ["$$END_WHILE$$",                      LanguageDB["$loop-end"]],
                           ["$$INPUT_P_INCREMENT$$",              LanguageDB["$input/increment"]],
                           ["$$INPUT_P_DECREMENT$$",              LanguageDB["$input/decrement"]],
                           ["$$INPUT_GET$$",                      LanguageDB["$input/get"]],
                           ["$$IF_INPUT_EQUAL_DELIMITER_0$$",     LanguageDB["$if =="]("Skipper$$SKIPPER_INDEX$$[0]")],
                           ["$$ENDIF$$",                          LanguageDB["$endif"]],
                           ["$$ENTRY$$",                          get_label("$entry", skipper_index)],
                           ["$$RELOAD$$",                         get_label("$reload", skipper_index)],
                           ["$$GOTO_ENTRY$$",                     get_label("$entry", skipper_index, U=True)],
                           # When things were skipped, no change to acceptance flags or modes has
                           # happend. One can jump immediately to the start without re-entry preparation.
                           ["$$GOTO_AFTER_END_OF_SKIPPING$$",     goto_after_end_of_skipping_str], 
                           ["$$MARK_LEXEME_START$$",              LanguageDB["$mark-lexeme-start"]],
                           ["$$DELIMITER_REMAINDER_TEST$$",       delimiter_remainder_test_str],
                           ["$$ON_SKIP_RANGE_OPEN$$",             on_skip_range_open_str],
                          ])

    # Line and column number counting
    code_str, reference_p_f = __lc_counting_replacements(code_str, EndSequence)

    # The finishing touch
    code_str = blue_print(code_str,
                          [["$$SKIPPER_INDEX$$", __nice(skipper_index)],
                           ["$$GOTO_RELOAD$$",   get_label("$reload", skipper_index)]])

    if reference_p_f:
        local_variable_db["QUEX_OPTION_COLUMN_NUMBER_COUNTING/reference_p"] = \
                           Variable("reference_p", 
                                    "QUEX_TYPE_CHARACTER_POSITION", 
                                    None,
                                    "(QUEX_TYPE_CHARACTER_POSITION)0x0",
                                    "QUEX_OPTION_COLUMN_NUMBER_COUNTING")

    return code_str, local_variable_db
Ejemplo n.º 36
0
def __range_skipper_lc_counting_replacements(code_str, EndSequence):
    """Line and Column Number Counting(Range Skipper):
     
         -- in loop if there appears a newline, then do:
            increment line_n
            set position from where to count column_n
         -- at end of skipping do one of the following:
            if end delimiter contains newline:
               column_n = number of letters since last new line in end delimiter
               increment line_n by number of newlines in end delimiter.
               (NOTE: in this case the setting of the position from where to count
                      the column_n can be omitted.)
            else:
               column_n = current_position - position from where to count column number.

       NOTE: On reload we do count the column numbers and reset the column_p.
    """
    variable_definition = \
      "#   if defined(QUEX_OPTION_LINE_NUMBER_COUNTING) || defined(QUEX_OPTION_COLUMN_NUMBER_COUNTING)\n" + \
      "#   ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \
      "    QUEX_CHARACTER_POSITION_TYPE column_count_p_$$SKIPPER_INDEX$$ = QuexBuffer_tell_memory_adr(&me->buffer);\n"+\
      "#   endif\n" + \
      "#   endif\n"
    in_loop       = ""
    end_procedure = ""
    exit_loop     = ""
    new_line_detection_in_loop_enabled_f = True

    # Does the end delimiter contain a newline?
    try:    index = EndSequence.index(ord("\n"))
    except: index = -1
    if index != -1:
        if index == 0:
            # Inside the skipped range, there cannot have been a newline
            new_line_detection_in_loop_enabled_f = False
            exit_loop = "#       ifdef QUEX_OPTION_LINE_NUMBER_COUNTING\n" + \
                        "        ++(self.counter._line_number_at_end); /* First limit character was the newline */\n" \
                        "#       endif" 

        # If the first character in the delimiter is newline, then it was counted alread, see above.
        delimiter_newline_n = EndSequence[1:].count(ord("\n"))
        if delimiter_newline_n != 0:
            end_procedure += "#       ifdef QUEX_OPTION_LINE_NUMBER_COUNTING\n" + \
                             "        self.counter._line_number_at_end += %i;\n" % delimiter_newline_n + \
                             "#       endif\n"

        # If delimiter contains newline, then the column number is identical to the distance
        # of the last newline to the end of the delimiter.
        dummy = deepcopy(EndSequence)
        dummy.reverse()
        delimiter_tail_n    = dummy.index(ord("\n")) + 1
        if delimiter_tail_n != 0:
            end_procedure += "#       ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \
                             "        self.counter._column_number_at_end = %i;\n" % delimiter_tail_n + \
                             "#       endif\n"
    else:
        end_procedure = "#       ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \
                        "        self.counter._column_number_at_end +=   QuexBuffer_tell_memory_adr(&me->buffer)\n" + \
                        "                                              - column_count_p_$$SKIPPER_INDEX$$;\n" + \
                        "#   endif\n"
    before_reload  = "#   ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \
                     "    self.counter._column_number_at_end +=  QuexBuffer_tell_memory_adr(&me->buffer)\n" + \
                     "                                         - column_count_p_$$SKIPPER_INDEX$$;\n" + \
                     "#   endif\n"
    after_reload   = "#       ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \
                     "        column_count_p_$$SKIPPER_INDEX$$ = QuexBuffer_tell_memory_adr(&me->buffer);\n" + \
                     "#       endif\n"

    if new_line_detection_in_loop_enabled_f:
        in_loop = lc_counter_in_loop

    return blue_print(code_str,
                     [["$$LC_COUNT_COLUMN_N_POINTER_DEFINITION$$", variable_definition],
                      ["$$LC_COUNT_IN_LOOP$$",                     in_loop],
                      ["$$LC_COUNT_END_PROCEDURE$$",               end_procedure],
                      ["$$LC_COUNT_BEFORE_RELOAD$$",               before_reload],
                      ["$$LC_COUNT_AFTER_RELOAD$$",                after_reload],
                      ["$$LC_COUNT_AT_LOOP_EXIT$$",                exit_loop],
                      ])
Ejemplo n.º 37
0
def _do(Descr):
    # The following things must be ensured before the function is called
    assert Descr != None
    assert Descr.__class__.__name__ == "TokenTypeDescriptor"
    ## ALLOW: Descr.get_member_db().keys() == []

    TemplateFile = QUEX_PATH \
                   + Setup.language_db["$code_base"] \
                   + Setup.language_db["$token_template_file"]

    TemplateIFile = QUEX_PATH \
                   + Setup.language_db["$code_base"] \
                   + Setup.language_db["$token_template_i_file"]

    template_str = open_file_or_die(TemplateFile, Mode="rb").read()
    template_i_str = open_file_or_die(TemplateIFile, Mode="rb").read()

    virtual_destructor_str = ""
    if Descr.open_for_derivation_f: virtual_destructor_str = "virtual "

    if Descr.copy.get_pure_code() == "":
        # Default copy operation: Plain Copy of token memory
        copy_str = "__QUEX_STD_memcpy((void*)__this, (void*)__That, sizeof(QUEX_TYPE_TOKEN));\n"
    else:
        copy_str = Descr.copy.get_code()

    take_text_str = Descr.take_text.get_code()
    if take_text_str == "": take_text_str = "return true;\n"

    include_guard_extension_str = get_include_guard_extension(
        Setup.language_db["$namespace-ref"](Descr.name_space) + "__" +
        Descr.class_name)

    # In case of plain 'C' the class name must incorporate the namespace (list)
    token_class_name = Descr.class_name
    if Setup.language == "C":
        token_class_name = Setup.token_class_name_safe

    txt = blue_print(template_str, [
        ["$$BODY$$", Descr.body.get_code()],
        ["$$CONSTRUCTOR$$", Descr.constructor.get_code()],
        ["$$COPY$$", copy_str],
        ["$$DESTRUCTOR$$", Descr.destructor.get_code()],
        ["$$DISTINCT_MEMBERS$$",
         get_distinct_members(Descr)],
        ["$$FOOTER$$", Descr.footer.get_code()],
        ["$$FUNC_TAKE_TEXT$$", take_text_str],
        ["$$HEADER$$", Descr.header.get_code()],
        ["$$INCLUDE_GUARD_EXTENSION$$", include_guard_extension_str],
        [
            "$$NAMESPACE_CLOSE$$", Setup.language_db["$namespace-close"](
                Descr.name_space)
        ],
        [
            "$$NAMESPACE_OPEN$$", Setup.language_db["$namespace-open"](
                Descr.name_space)
        ],
        ["$$QUICK_SETTERS$$", get_quick_setters(Descr)],
        ["$$SETTERS_GETTERS$$",
         get_setter_getter(Descr)],
        ["$$TOKEN_CLASS$$", token_class_name],
        ["$$TOKEN_REPETITION_N_GET$$",
         Descr.repetition_get.get_code()],
        ["$$TOKEN_REPETITION_N_SET$$",
         Descr.repetition_set.get_code()],
        ["$$UNION_MEMBERS$$", get_union_members(Descr)],
        ["$$VIRTUAL_DESTRUCTOR$$", virtual_destructor_str],
    ])

    txt_i = blue_print(template_i_str, [
        ["$$CONSTRUCTOR$$", Descr.constructor.get_code()],
        ["$$COPY$$", copy_str],
        ["$$DESTRUCTOR$$", Descr.destructor.get_code()],
        ["$$FOOTER$$", Descr.footer.get_code()],
        ["$$FUNC_TAKE_TEXT$$", take_text_str],
        ["$$INCLUDE_GUARD_EXTENSION$$", include_guard_extension_str],
        [
            "$$NAMESPACE_CLOSE$$", Setup.language_db["$namespace-close"](
                Descr.name_space)
        ],
        [
            "$$NAMESPACE_OPEN$$", Setup.language_db["$namespace-open"](
                Descr.name_space)
        ],
        ["$$TOKEN_CLASS$$", token_class_name],
        ["$$TOKEN_REPETITION_N_GET$$",
         Descr.repetition_get.get_code()],
        ["$$TOKEN_REPETITION_N_SET$$",
         Descr.repetition_set.get_code()],
    ])

    # Return declaration and implementation as two strings
    return txt, txt_i
Ejemplo n.º 38
0
def get_range_skipper(EndSequence, LanguageDB, MissingClosingDelimiterAction=""):
    assert EndSequence.__class__  == list
    assert len(EndSequence) >= 1
    assert map(type, EndSequence) == [int] * len(EndSequence)

    # Name the $$SKIPPER$$
    skipper_index = sm_index.get()

    # Determine the $$DELIMITER$$
    delimiter_str = ""
    delimiter_comment_str = "                         Delimiter: "
    for letter in EndSequence:
        delimiter_comment_str += "'%s', " % utf8.map_unicode_to_utf8(letter)
        delimiter_str += "0x%X, " % letter
    delimiter_length_str = "%i" % len(EndSequence)
    delimiter_comment_str = LanguageDB["$comment"](delimiter_comment_str) 

    # Determine the check for the tail of the delimiter
    delimiter_remainder_test_str = ""
    if len(EndSequence) != 1: 
        txt = ""
        i = 0
        for letter in EndSequence[1:]:
            i += 1
            txt += "    " + LanguageDB["$input/get-offset"](i-1) + "\n"
            txt += "    " + LanguageDB["$if !="]("Skipper$$SKIPPER_INDEX$$[%i]" % i)
            txt += "         " + LanguageDB["$goto"]("$entry", skipper_index) + "\n"
            txt += "    " + LanguageDB["$endif"]
        delimiter_remainder_test_str = txt

    # The main part
    code_str = blue_print(range_skipper_template,
                          [["$$DELIMITER$$",                  delimiter_str],
                           ["$$DELIMITER_LENGTH$$",           delimiter_length_str],
                           ["$$DELIMITER_COMMENT$$",          delimiter_comment_str],
                           ["$$WHILE_1_PLUS_1_EQUAL_2$$",     LanguageDB["$loop-start-endless"]],
                           ["$$END_WHILE$$",                  LanguageDB["$loop-end"]],
                           ["$$INPUT_P_INCREMENT$$",          LanguageDB["$input/increment"]],
                           ["$$INPUT_P_DECREMENT$$",          LanguageDB["$input/decrement"]],
                           ["$$INPUT_GET$$",                  LanguageDB["$input/get"]],
                           ["$$IF_INPUT_EQUAL_DELIMITER_0$$", LanguageDB["$if =="]("Skipper$$SKIPPER_INDEX$$[0]")],
                           ["$$BREAK$$",                      LanguageDB["$break"]],
                           ["$$ENDIF$$",                      LanguageDB["$endif"]],
                           ["$$ENTRY$$",                      LanguageDB["$label-def"]("$entry", skipper_index)],
                           ["$$DROP_OUT$$",                   LanguageDB["$label-def"]("$drop-out", skipper_index)],
                           ["$$GOTO_ENTRY$$",                 LanguageDB["$goto"]("$entry", skipper_index)],
                           ["$$GOTO_REENTRY_PREPARATION$$",   LanguageDB["$goto"]("$re-start")],
                           ["$$MARK_LEXEME_START$$",          LanguageDB["$mark-lexeme-start"]],
                           ["$$DELIMITER_REMAINDER_TEST$$",   delimiter_remainder_test_str],
                           ["$$SET_INPUT_P_BEHIND_DELIMITER$$", LanguageDB["$input/add"](len(EndSequence)-1)],
                           ["$$MISSING_CLOSING_DELIMITER$$",  MissingClosingDelimiterAction],
                          ])

    # Line and column number counting
    code_str = __range_skipper_lc_counting_replacements(code_str, EndSequence)

    # The finishing touch
    code_str = blue_print(code_str,
                          [["$$SKIPPER_INDEX$$", __nice(skipper_index)],
                           ["$$GOTO_DROP_OUT$$", LanguageDB["$goto"]("$drop-out", skipper_index)]])

    return code_str
Ejemplo n.º 39
0
def get_skipper(TriggerSet):
    """This function implements simple 'skipping' in the sense of passing by
       characters that belong to a given set of characters--the TriggerSet.
    """
    global template_str
    assert TriggerSet.__class__.__name__ == "NumberSet"
    assert not TriggerSet.is_empty()

    LanguageDB = Setup.language_db

    skipper_index = sm_index.get()
    # Mini trigger map:  [ trigger set ] --> loop start
    # That means: As long as characters of the trigger set appear, we go to the loop start.
    transition_map = TransitionMap(
    )  # (don't worry about 'drop-out-ranges' etc.)
    transition_map.add_transition(TriggerSet, skipper_index)
    # On buffer limit code, the skipper must transit to a dedicated reloader

    iteration_code = transition_block.do(transition_map.get_trigger_map(),
                                         skipper_index,
                                         DSM=None,
                                         GotoReload_Str="goto %s;" %
                                         get_label("$reload", skipper_index))

    comment_str = LanguageDB["$comment"]("Skip any character in " +
                                         TriggerSet.get_utf8_string())

    # Line and column number counting
    prolog = __lc_counting_replacements(prolog_txt, TriggerSet)
    epilog = __lc_counting_replacements(epilog_txt, TriggerSet)

    prolog = blue_print(prolog, [
        ["$$DELIMITER_COMMENT$$", comment_str],
        ["$$SKIPPER_INDEX$$", "%i" % skipper_index],
        ["$$INPUT_GET$$", LanguageDB["$input/get"]],
    ])

    epilog = blue_print(
        epilog,
        [
            ["$$INPUT_P_INCREMENT$$", LanguageDB["$input/increment"]],
            ["$$INPUT_P_DECREMENT$$", LanguageDB["$input/decrement"]],
            [
                "$$IF_INPUT_EQUAL_DELIMITER_0$$",
                LanguageDB["$if =="]("SkipDelimiter$$SKIPPER_INDEX$$[0]")
            ],
            ["$$ENDIF$$", LanguageDB["$endif"]],
            ["$$LOOP_REENTRANCE$$",
             get_label("$entry", skipper_index)],
            ["$$INPUT_EQUAL_BUFFER_LIMIT_CODE$$", LanguageDB["$BLC"]],
            ["$$RELOAD$$", get_label("$reload", skipper_index)],
            [
                "$$DROP_OUT_DIRECT$$",
                get_label("$drop-out", skipper_index, U=True)
            ],
            ["$$SKIPPER_INDEX$$", "%i" % skipper_index],
            ["$$GOTO_TERMINAL_EOF$$",
             get_label("$terminal-EOF", U=True)],
            # When things were skipped, no change to acceptance flags or modes has
            # happend. One can jump immediately to the start without re-entry preparation.
            ["$$GOTO_START$$", get_label("$start", U=True)],
            ["$$MARK_LEXEME_START$$", LanguageDB["$mark-lexeme-start"]],
        ])

    code = [prolog]
    code.extend(iteration_code)
    code.append(epilog)

    local_variable_db = {}
    local_variable_db["QUEX_OPTION_COLUMN_NUMBER_COUNTING/reference_p"] = \
                     Variable("reference_p",
                              "QUEX_TYPE_CHARACTER_POSITION",
                              None,
                              "(QUEX_TYPE_CHARACTER_POSITION)0x0",
                              "QUEX_OPTION_COLUMN_NUMBER_COUNTING")

    return code, local_variable_db
Ejemplo n.º 40
0
def __terminal_states(SMD, action_db, OnFailureAction, EndOfStreamAction, 
                      SupportBeginOfLineF, PreConditionIDList, LanguageDB):
    """NOTE: During backward-lexing, for a pre-condition, there is not need for terminal
             states, since only the flag 'pre-condition fulfilled is raised.
    """      
    assert SMD.__class__.__name__ == "StateMachineDecorator"
    sm = SMD.sm()
    PostConditionedStateMachineID_List = SMD.post_contexted_sm_id_list()
    DirectlyReachedTerminalID_List     = SMD.directly_reached_terminal_id_list()

    # (*) specific terminal states of patterns (entered from acceptance states)
    specific_terminal_states = []
    for state_machine_id, pattern_action_info in action_db.items():
        code = get_terminal_code(state_machine_id, SMD, pattern_action_info, SupportBeginOfLineF, LanguageDB)

        specific_terminal_states.extend(code)

    # If there is at least a single terminal, the the 're-entry' preparation must be accomplished
    if len(action_db) != 0: get_label("$re-start", U=True)

    # (*) preparation of the reentry without return:
    #     delete all pre-condition fullfilled flags
    delete_pre_context_flags = []
    for pre_context_sm_id in PreConditionIDList:
        delete_pre_context_flags.append("    ")
        delete_pre_context_flags.append(LanguageDB["$assignment"]("pre_context_%s_fulfilled_f" % __nice(pre_context_sm_id), 0))

    #  -- execute 'on_failure' pattern action 
    #  -- goto initial state    
    end_of_stream_code_action_str = __adorn_action_code(EndOfStreamAction, SMD, SupportBeginOfLineF)

    # -- FAILURE ACTION: Under 'normal' circumstances the on_failure action is simply to be executed
    #                    since the 'get_forward()' incremented the 'current' pointer.
    #                    HOWEVER, when end of file has been reached the 'current' pointer has to
    #                    be reset so that the initial state can drop out on the buffer limit code
    #                    and then transit to the end of file action.
    # NOTE: It is possible that 'miss' happens after a chain of characters appeared. In any case the input
    #       pointer must be setup right after the lexeme start. This way, the lexer becomes a new chance as
    #       soon as possible.
    on_failure = __terminal_on_failure_prolog(LanguageDB)
    msg        = __adorn_action_code(OnFailureAction, SMD, SupportBeginOfLineF)

    on_failure.append(msg)

    if PreConditionIDList == []: precondition_involved_f = "0"
    else:                        precondition_involved_f = "1"

    prolog = __terminal_state_prolog  

    router = Address("$terminal-router", None,
                  [
                      blue_print(__terminal_router_prolog_str,
                      [
                       ["$$RESTORE_LAST_ACCEPTANCE_POS$$",  LanguageDB["$input/seek_position"]("last_acceptance_input_position")],
                       ["$$TERMINAL_FAILURE-REF$$",         "QUEX_LABEL(%i)" % get_address("$terminal-FAILURE")],
                       ["$$TERMINAL_FAILURE$$",             get_label("$terminal-FAILURE")],
                      ]),
                      # DO NOT 'U=True' for the state router. This is done automatically if 
                      # 'goto reload' is used. 
                      get_label("$state-router"), ";",
                      __terminal_router_epilog_str, 
                  ])
                     
    epilog = blue_print(__terminal_state_epilog, 
             [
              ["$$FAILURE_ACTION$$",             "".join(on_failure)],
              ["$$END_OF_STREAM_ACTION$$",       end_of_stream_code_action_str],
              ["$$TERMINAL_END_OF_STREAM-DEF$$", get_label("$terminal-EOF")],
              ["$$TERMINAL_FAILURE-DEF$$",       get_label("$terminal-FAILURE")],
              ["$$STATE_MACHINE_NAME$$",         SMD.name()],
              ["$$GOTO_START_PREPARATION$$",     get_label("$re-start", U=True)],
             ])

    reentry_preparation = blue_print(__on_continue_reentry_preparation_str,
                          [["$$REENTRY_PREPARATION$$",                    get_label("$re-start")],
                           ["$$DELETE_PRE_CONDITION_FULLFILLED_FLAGS$$",  "".join(delete_pre_context_flags)],
                           ["$$GOTO_START$$",                             get_label("$start", U=True)],
                           ["$$COMMENT_ON_POST_CONTEXT_INITIALIZATION$$", comment_on_post_context_position_init_str],
                           ["$$TERMINAL_FAILURE-REF$$",                   "QUEX_LABEL(%i)" % get_address("$terminal-FAILURE")],
                          ])

    txt = []
    txt.append(router)
    txt.append(prolog)
    txt.extend(specific_terminal_states)
    txt.append(epilog)
    txt.append(reentry_preparation)

    return txt
Ejemplo n.º 41
0
def __lc_counting_replacements(code_str, EndSequence):
    """Line and Column Number Counting(Range Skipper):
     
         -- in loop if there appears a newline, then do:
            increment line_n
            set position from where to count column_n
         -- at end of skipping do one of the following:
            if end delimiter contains newline:
               column_n = number of letters since last new line in end delimiter
               increment line_n by number of newlines in end delimiter.
               (NOTE: in this case the setting of the position from where to count
                      the column_n can be omitted.)
            else:
               column_n = current_position - position from where to count column number.

       NOTE: On reload we do count the column numbers and reset the column_p.
    """
    LanguageDB = Setup.language_db

    def get_character_n_after_last_newline(Sequence):
        tmp = copy(Sequence)
        tmp.reverse()
        try:
            return tmp.index(ord("\n"))
        except:
            return -1

    char_n_after_last_newline = get_character_n_after_last_newline(EndSequence)

    reference_p_def = ""

    in_loop = ""
    end_procedure = ""
    exit_loop = ""
    before_reload = ""
    after_reload = ""
    exit_loop = "            " + LanguageDB["$break"]
    on_first_delimiter = ""

    reference_p_required_f = False

    # Line/Column Counting:
    newline_number_in_delimiter = EndSequence.count(ord("\n"))

    if EndSequence == map(ord, "\n") or EndSequence == map(ord, "\r\n"):
        #  (1) If the end-delimiter is a newline
        #      => there cannot appear a newline inside the comment
        #      => IN LOOP: no line number increment
        #                  no reference pointer required for column counting
        end_procedure += "        __QUEX_IF_COUNT_COLUMNS_SET((size_t)1);\n"
        end_procedure += "        __QUEX_IF_COUNT_LINES_ADD((size_t)1);\n"

    else:
        #  (2) If end-delimiter is NOT newline
        #      => there can appear a newline inside the comment
        if newline_number_in_delimiter == 0:
            # -- no newlines in delimiter => line and column number
            #                                must be counted.
            in_loop = line_column_counter_in_loop
            end_procedure = (
                "        __QUEX_IF_COUNT_COLUMNS_ADD((size_t)(QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer)\n"
                + "                                    - reference_p));\n"
            )
            reference_p_required_f = True
        else:
            # -- newline inside delimiter => line number must be counted
            #                                column number is fixed.
            end_procedure = "        __QUEX_IF_COUNT_COLUMNS_SET((size_t)%i);\n" % (char_n_after_last_newline + 1)

            if EndSequence[0] == ord("\n") or len(EndSequence) > 1 and EndSequence[0:2] == [ord("\r"), ord("\n")]:
                # If the first character in the sequence is newline, then the line counting
                # may is prevented by the loop exit. Now, we need to count.
                on_first_delimiter = (
                    "/* First delimiter char was a newline */\n" + "    __QUEX_IF_COUNT_LINES_ADD((size_t)1);\n"
                )
                end_procedure += "        __QUEX_IF_COUNT_LINES_ADD((size_t)%i);\n" % (newline_number_in_delimiter - 1)
            else:
                in_loop = line_counter_in_loop
                end_procedure += "        __QUEX_IF_COUNT_LINES_ADD((size_t)%i);\n" % newline_number_in_delimiter

    if reference_p_required_f:
        reference_p_def = "    __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n"
        before_reload = (
            "    __QUEX_IF_COUNT_COLUMNS_ADD((size_t)(QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer)\n"
            + "                                - reference_p));\n"
        )
        after_reload = (
            "        __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n"
        )

    if len(EndSequence) > 1:
        end_procedure = LanguageDB["$input/add"](len(EndSequence) - 1) + "\n" + end_procedure

    return (
        blue_print(
            code_str,
            [
                ["$$LC_COUNT_COLUMN_N_POINTER_DEFINITION$$", reference_p_def],
                ["$$LC_COUNT_IN_LOOP$$", in_loop],
                ["$$LC_COUNT_END_PROCEDURE$$", end_procedure],
                ["$$LC_COUNT_BEFORE_RELOAD$$", before_reload],
                ["$$LC_COUNT_AFTER_RELOAD$$", after_reload],
                ["$$LC_ON_FIRST_DELIMITER$$", on_first_delimiter],
                ["$$LC_COUNT_LOOP_EXIT$$", exit_loop],
            ],
        ),
        reference_p_required_f,
    )
Ejemplo n.º 42
0
def __range_skipper_lc_counting_replacements(code_str, EndSequence):
    """Line and Column Number Counting(Range Skipper):
     
         -- in loop if there appears a newline, then do:
            increment line_n
            set position from where to count column_n
         -- at end of skipping do one of the following:
            if end delimiter contains newline:
               column_n = number of letters since last new line in end delimiter
               increment line_n by number of newlines in end delimiter.
               (NOTE: in this case the setting of the position from where to count
                      the column_n can be omitted.)
            else:
               column_n = current_position - position from where to count column number.

       NOTE: On reload we do count the column numbers and reset the column_p.
    """
    variable_definition = \
      "#   if defined(QUEX_OPTION_LINE_NUMBER_COUNTING) || defined(QUEX_OPTION_COLUMN_NUMBER_COUNTING)\n" + \
      "#   ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \
      "    QUEX_CHARACTER_POSITION_TYPE column_count_p_$$SKIPPER_INDEX$$ = QuexBuffer_tell_memory_adr(&me->buffer);\n"+\
      "#   endif\n" + \
      "#   endif\n"
    in_loop = ""
    end_procedure = ""
    exit_loop = ""
    new_line_detection_in_loop_enabled_f = True

    # Does the end delimiter contain a newline?
    try:
        index = EndSequence.index(ord("\n"))
    except:
        index = -1
    if index != -1:
        if index == 0:
            # Inside the skipped range, there cannot have been a newline
            new_line_detection_in_loop_enabled_f = False
            exit_loop = "#       ifdef QUEX_OPTION_LINE_NUMBER_COUNTING\n" + \
                        "        ++(self.counter._line_number_at_end); /* First limit character was the newline */\n" \
                        "#       endif"

        # If the first character in the delimiter is newline, then it was counted alread, see above.
        delimiter_newline_n = EndSequence[1:].count(ord("\n"))
        if delimiter_newline_n != 0:
            end_procedure += "#       ifdef QUEX_OPTION_LINE_NUMBER_COUNTING\n" + \
                             "        self.counter._line_number_at_end += %i;\n" % delimiter_newline_n + \
                             "#       endif\n"

        # If delimiter contains newline, then the column number is identical to the distance
        # of the last newline to the end of the delimiter.
        dummy = deepcopy(EndSequence)
        dummy.reverse()
        delimiter_tail_n = dummy.index(ord("\n")) + 1
        if delimiter_tail_n != 0:
            end_procedure += "#       ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \
                             "        self.counter._column_number_at_end = %i;\n" % delimiter_tail_n + \
                             "#       endif\n"
    else:
        end_procedure = "#       ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \
                        "        self.counter._column_number_at_end +=   QuexBuffer_tell_memory_adr(&me->buffer)\n" + \
                        "                                              - column_count_p_$$SKIPPER_INDEX$$;\n" + \
                        "#   endif\n"
    before_reload  = "#   ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \
                     "    self.counter._column_number_at_end +=  QuexBuffer_tell_memory_adr(&me->buffer)\n" + \
                     "                                         - column_count_p_$$SKIPPER_INDEX$$;\n" + \
                     "#   endif\n"
    after_reload   = "#       ifdef QUEX_OPTION_COLUMN_NUMBER_COUNTING\n" + \
                     "        column_count_p_$$SKIPPER_INDEX$$ = QuexBuffer_tell_memory_adr(&me->buffer);\n" + \
                     "#       endif\n"

    if new_line_detection_in_loop_enabled_f:
        in_loop = lc_counter_in_loop

    return blue_print(code_str, [
        ["$$LC_COUNT_COLUMN_N_POINTER_DEFINITION$$", variable_definition],
        ["$$LC_COUNT_IN_LOOP$$", in_loop],
        ["$$LC_COUNT_END_PROCEDURE$$", end_procedure],
        ["$$LC_COUNT_BEFORE_RELOAD$$", before_reload],
        ["$$LC_COUNT_AFTER_RELOAD$$", after_reload],
        ["$$LC_COUNT_AT_LOOP_EXIT$$", exit_loop],
    ])
Ejemplo n.º 43
0
def  get_implementation_of_mode_functions(mode, Modes):
    """Writes constructors and mode transition functions.

                  void quex::lexer::enter_EXAMPLE_MODE() { ... }

       where EXAMPLE_MODE is a lexer mode from the given lexer-modes, and
       'quex::lexer' is the lexical analysis class.
    """
    # (*) on enter 
    code_fragments = mode.on_entry_code_fragments()    
    on_entry_str  = "#ifdef __QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK\n"
    on_entry_str += "assert(me->%s.has_entry_from(FromMode));\n" % mode.name
    on_entry_str += "#endif\n"
    for code_info in code_fragments:
        on_entry_str += code_info.get_code()
        if on_entry_str[-1] == "\n": on_entry_str = on_entry_str[:-1]

    # (*) on exit
    code_fragments = mode.on_exit_code_fragments()    
    on_exit_str  = "#ifdef __QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK\n"
    on_exit_str += "assert(me->%s.has_exit_to(ToMode));\n" % mode.name
    on_exit_str += "#endif\n"
    for code_info in code_fragments:
        on_exit_str += code_info.get_code()

    # (*) on indentation
    code_fragments = mode.on_indentation_code_fragments()    
    on_indentation_str = "assert(Indentation >= 0);" 
    for code_info in code_fragments:
        on_indentation_str += code_info.get_code()
        
    # (*) has base mode
    if mode.get_base_modes() != []:
        has_base_mode_str = get_IsOneOfThoseCode(mode.get_base_modes())
    else:
        has_base_mode_str = "    return false;"
        
    # (*) has entry from
    try:
        entry_list = mode.options["entry"]        
        has_entry_from_str = get_IsOneOfThoseCode(entry_list,
                                                  ConsiderDerivedClassesF=true)
        # check whether the mode we come from is an allowed mode
    except:
        has_entry_from_str = "    return true; // default"        

    # (*) has exit to
    try:
        exit_list = mode.options["exit"]
        has_exit_to_str = get_IsOneOfThoseCode(exit_list,
                                               ConsiderDerivedClassesF=true)
    except:
        has_exit_to_str = "    return true; // default"

    
    txt = blue_print(mode_function_implementation_str,
                     [["%%ENTER-PROCEDURE%%",           on_entry_str],
                      ["%%EXIT-PROCEDURE%%",            on_exit_str],
                      ["%%INDENTATION-PROCEDURE%%",     on_indentation_str],
                      ["%%HAS_BASE_MODE%%",             has_base_mode_str],
                      ["%%HAS_ENTRANCE_FROM%%",         has_entry_from_str],
                      ["%%HAS_EXIT_TO%%",               has_exit_to_str],
                      ["%%MODE_NAME%%",                 mode.name],
                      ])
    return txt
Ejemplo n.º 44
0
def get_on_indentation_handler(Mode):

    # 'on_dedent' and 'on_n_dedent cannot be defined at the same time.
    assert not (    Mode.has_code_fragment_list("on_dedent") \
                and Mode.has_code_fragment_list("on_n_dedent"))

    # A mode that deals only with the default indentation handler relies
    # on what is defined in '$QUEX_PATH/analayzer/member/on_indentation.i'
    if Mode.default_indentation_handler_sufficient():
        return "    return;"

    if Mode.has_code_fragment_list("on_indent"):
        on_indent_str, eol_f = action_code_formatter.get_code(
            Mode.get_code_fragment_list("on_indent"))
    else:
        on_indent_str = "self_send(__QUEX_SETTING_TOKEN_ID_INDENT);"

    if Mode.has_code_fragment_list("on_nodent"):
        on_nodent_str, eol_f = action_code_formatter.get_code(
            Mode.get_code_fragment_list("on_nodent"))
    else:
        on_nodent_str = "self_send(__QUEX_SETTING_TOKEN_ID_NODENT);"

    if Mode.has_code_fragment_list("on_dedent"):
        assert not Mode.has_code_fragment_list("on_n_dedent")
        on_dedent_str, eol_f = action_code_formatter.get_code(
            Mode.get_code_fragment_list("on_dedent"))
        on_n_dedent_str = ""

    elif Mode.has_code_fragment_list("on_n_dedent"):
        assert not Mode.has_code_fragment_list("on_dedent")
        on_n_dedent_str, eol_f = action_code_formatter.get_code(
            Mode.get_code_fragment_list("on_n_dedent"))
        on_dedent_str = ""

    else:
        # If no 'on_dedent' and no 'on_n_dedent' is defined ...
        on_dedent_str = ""
        on_n_dedent_str = "#if defined(QUEX_OPTION_TOKEN_REPETITION_SUPPORT)\n"
        on_n_dedent_str += "    self_send_n(ClosedN, __QUEX_SETTING_TOKEN_ID_DEDENT);\n"
        on_n_dedent_str += "#else\n"
        on_n_dedent_str += "    while( start-- != stack->back ) self_send(__QUEX_SETTING_TOKEN_ID_DEDENT);\n"
        on_n_dedent_str += "#endif\n"

    if not Mode.has_code_fragment_list("on_indentation_error"):
        # Default: Blow the program if there is an indentation error.
        on_indentation_error = 'QUEX_ERROR_EXIT("Lexical analyzer mode \'%s\': indentation error detected!\\n"' \
                               % Mode.name + \
                               '                "No \'on_indentation_error\' handler has been specified.\\n");'
    else:
        on_indentation_error, eol_f = action_code_formatter.get_code(
            Mode.get_code_fragment_list("on_indentation_error"))

    # Note: 'on_indentation_bad' is applied in code generation for
    #       indentation counter in 'indentation_counter.py'.
    txt = blue_print(
        on_indentation_str,
        [["$$INDENT-PROCEDURE$$", on_indent_str],
         ["$$NODENT-PROCEDURE$$", on_nodent_str],
         ["$$DEDENT-PROCEDURE$$", on_dedent_str],
         ["$$N-DEDENT-PROCEDURE$$", on_n_dedent_str],
         ["$$INDENTATION-ERROR-PROCEDURE$$", on_indentation_error]])
    return txt
Ejemplo n.º 45
0
def __terminal_states(SMD, action_db, DefaultAction, EndOfStreamAction, 
                      SupportBeginOfLineF, PreConditionIDList, LanguageDB):
    """NOTE: During backward-lexing, for a pre-condition, there is not need for terminal
             states, since only the flag 'pre-condition fulfilled is raised.
    """      
    assert SMD.__class__.__name__ == "StateMachineDecorator"
    sm = SMD.sm()
    PostConditionedStateMachineID_List = SMD.post_contexted_sm_id_list()
    DirectlyReachedTerminalID_List     = SMD.directly_reached_terminal_id_list()

    # (*) specific terminal states of patterns (entered from acceptance states)
    txt = ""
    for state_machine_id, pattern_action_info in action_db.items():
        txt += get_terminal_code(state_machine_id, SMD, pattern_action_info, SupportBeginOfLineF, LanguageDB)
    specific_terminal_states_str = txt

    # (*) general terminal state (entered from non-acceptance state)    
    txt = ""    
    for state_machine_id in action_db.keys():
        txt += "            case %s: " % repr(state_machine_id).replace("L", "")
        txt += LanguageDB["$goto"]("$terminal", state_machine_id) + "\n"
    jumps_to_acceptance_states_str = txt

    # (*) preparation of the reentry without return:
    #     delete all pre-condition fullfilled flags
    txt = ""
    for pre_context_sm_id in PreConditionIDList:
        txt += "    " + LanguageDB["$assignment"]("pre_context_%s_fulfilled_f" % __nice(pre_context_sm_id), 0)
    delete_pre_context_flags_str = txt

    #  -- execute default pattern action 
    #  -- goto initial state    
    end_of_stream_code_action_str = __adorn_action_code(EndOfStreamAction, SMD, SupportBeginOfLineF,
                                                        IndentationOffset=16)
    # -- DEFAULT ACTION: Under 'normal' circumstances the default action is simply to be executed
    #                    since the 'get_forward()' incremented the 'current' pointer.
    #                    HOWEVER, when end of file has been reached the 'current' pointer has to
    #                    be reset so that the initial state can drop out on the buffer limit code
    #                    and then transit to the end of file action.
    # NOTE: It is possible that 'miss' happens after a chain of characters appeared. In any case the input
    #       pointer must be setup right after the lexeme start. This way, the lexer becomes a new chance as
    #       soon as possible.
    default_action_str  = "me->buffer._input_p = me->buffer._lexeme_start_p;\n"
    default_action_str += LanguageDB["$if EOF"] + "\n"
    default_action_str += "    " + LanguageDB["$comment"]("Next increment will stop on EOF character.") + "\n"
    default_action_str += LanguageDB["$endif"] + "\n"
    default_action_str += LanguageDB["$else"] + "\n"
    default_action_str += "    " + LanguageDB["$comment"]("Step over nomatching character") + "\n"
    default_action_str += "    " + LanguageDB["$input/increment"] + "\n"
    default_action_str += LanguageDB["$endif"] + "\n"
    default_action_str += __adorn_action_code(DefaultAction, SMD, SupportBeginOfLineF,
                                              IndentationOffset=16)

    # -- routing to states via switch statement
    #    (note, the gcc computed goto is implement, too)
    txt = ""
    for state_index, state in sm.states.items():
        if state.transitions().is_empty(): continue
        txt += "            "
        txt += "case %i: " % int(state_index) + LanguageDB["$goto"]("$input", state_index) + "\n"

    if sm.core().pre_context_sm() != None:
        for state_index, state in sm.core().pre_context_sm().states.items():
            if state.transitions().is_empty(): continue
            txt += "            "
            txt += "case %i: " % int(state_index) + LanguageDB["$goto"]("$input", state_index) + "\n"

    switch_cases_drop_out_back_router_str = txt

    if PreConditionIDList == []: precondition_involved_f = "0"
    else:                        precondition_involved_f = "1"

    txt = blue_print(__terminal_state_str, 
                     [["$$JUMPS_TO_ACCEPTANCE_STATE$$",    jumps_to_acceptance_states_str],   
                      ["$$SPECIFIC_TERMINAL_STATES$$",     specific_terminal_states_str],
                      ["$$DEFAULT_ACTION$$",               default_action_str],
                      ["$$END_OF_STREAM_ACTION$$",         end_of_stream_code_action_str],
                      ["$$TERMINAL_END_OF_STREAM-DEF$$",   LanguageDB["$label-def"]("$terminal-EOF")],
                      ["$$TERMINAL_DEFAULT-DEF$$",         LanguageDB["$label-def"]("$terminal-DEFAULT")],
                      ["$$TERMINAL_GENERAL-DEF$$",         LanguageDB["$label-def"]("$terminal-general", False)],
                      ["$$TERMINAL_DEFAULT-GOTO$$",        LanguageDB["$goto"]("$terminal-DEFAULT")],
                      ["$$STATE_MACHINE_NAME$$",           SMD.name()],
                      ["$$GOTO_START_PREPARATION$$",       LanguageDB["$goto"]("$re-start")],
                      ])

    txt += blue_print(__on_continue_reentry_preparation_str,
                      [["$$REENTRY_PREPARATION$$",                    LanguageDB["$label-def"]("$re-start")],
                       ["$$DELETE_PRE_CONDITION_FULLFILLED_FLAGS$$",  delete_pre_context_flags_str],
                       ["$$GOTO_START$$",                             LanguageDB["$goto"]("$start")],
                       ["$$COMMENT_ON_POST_CONTEXT_INITIALIZATION$$", comment_on_post_context_position_init_str],
                       ])

    return txt
Ejemplo n.º 46
0
def  get_implementation_of_mode_functions(mode, Modes):
    """Writes constructors and mode transition functions.

                  void quex::lexer::enter_EXAMPLE_MODE() { ... }

       where EXAMPLE_MODE is a lexer mode from the given lexer-modes, and
       'quex::lexer' is the lexical analysis class.
    """
    def __filter_out_inheritable_only(ModeNameList):
        result = []
        for name in ModeNameList:
            for mode in Modes:
                if mode.name == name:
                    if mode.options["inheritable"] != "only": result.append(name)
                    break
        return result

    # (*) on enter 
    on_entry_str  = "#   ifdef QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK\n"
    on_entry_str += "    QUEX_NAME(%s).has_entry_from(FromMode);\n" % mode.name
    on_entry_str += "#   endif\n"
    for code_info in mode.get_code_fragment_list("on_entry"):
        on_entry_str += code_info.get_code()
        if on_entry_str[-1] == "\n": on_entry_str = on_entry_str[:-1]

    # (*) on exit
    on_exit_str  = "#   ifdef QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK\n"
    on_exit_str += "    QUEX_NAME(%s).has_exit_to(ToMode);\n" % mode.name
    on_exit_str += "#   endif\n"
    for code_info in mode.get_code_fragment_list("on_exit"):
        on_exit_str += code_info.get_code()

    # (*) on indentation
    on_indentation_str = get_on_indentation_handler(mode)

    # (*) has base mode
    if mode.has_base_mode():
        base_mode_list    = __filter_out_inheritable_only(mode.get_base_mode_name_list())
        has_base_mode_str = get_IsOneOfThoseCode(base_mode_list)
    else:
        has_base_mode_str = "    return false;"
        
    # (*) has entry from
    try:
        entry_list         = __filter_out_inheritable_only(mode.options["entry"])
        has_entry_from_str = get_IsOneOfThoseCode(entry_list,
                                                  __filter_out_inheritable_only(ConsiderDerivedClassesF=true))
        # check whether the mode we come from is an allowed mode
    except:
        has_entry_from_str = "    return true; /* default */"        

    # (*) has exit to
    try:
        exit_list       = __filter_out_inheritable_only(mode.options["exit"])
        has_exit_to_str = get_IsOneOfThoseCode(exit_list,
                                               ConsiderDerivedClassesF=true)
    except:
        has_exit_to_str = "    return true; /* default */"

    
    txt = blue_print(mode_function_implementation_str,
                     [
                      ["$$ENTER-PROCEDURE$$",      on_entry_str],
                      ["$$EXIT-PROCEDURE$$",       on_exit_str],
                      #
                      ["$$ON_INDENTATION-PROCEDURE$$", on_indentation_str],
                      #
                      ["$$HAS_BASE_MODE$$",        has_base_mode_str],
                      ["$$HAS_ENTRANCE_FROM$$",    has_entry_from_str],
                      ["$$HAS_EXIT_TO$$",          has_exit_to_str],
                      #
                      ["$$MODE_NAME$$",            mode.name],
                      ])
    return txt
Ejemplo n.º 47
0
def get_range_skipper(EndSequence,
                      LanguageDB,
                      MissingClosingDelimiterAction=""):
    assert EndSequence.__class__ == list
    assert len(EndSequence) >= 1
    assert map(type, EndSequence) == [int] * len(EndSequence)

    # Name the $$SKIPPER$$
    skipper_index = sm_index.get()

    # Determine the $$DELIMITER$$
    delimiter_str = ""
    delimiter_comment_str = "                         Delimiter: "
    for letter in EndSequence:
        delimiter_comment_str += "'%s', " % utf8.map_unicode_to_utf8(letter)
        delimiter_str += "0x%X, " % letter
    delimiter_length_str = "%i" % len(EndSequence)
    delimiter_comment_str = LanguageDB["$comment"](delimiter_comment_str)

    # Determine the check for the tail of the delimiter
    delimiter_remainder_test_str = ""
    if len(EndSequence) != 1:
        txt = ""
        i = 0
        for letter in EndSequence[1:]:
            i += 1
            txt += "    " + LanguageDB["$input/get-offset"](i - 1) + "\n"
            txt += "    " + LanguageDB["$if !="](
                "Skipper$$SKIPPER_INDEX$$[%i]" % i)
            txt += "         " + LanguageDB["$goto"]("$entry",
                                                     skipper_index) + "\n"
            txt += "    " + LanguageDB["$endif"]
        delimiter_remainder_test_str = txt

    # The main part
    code_str = blue_print(range_skipper_template, [
        ["$$DELIMITER$$", delimiter_str],
        ["$$DELIMITER_LENGTH$$", delimiter_length_str],
        ["$$DELIMITER_COMMENT$$", delimiter_comment_str],
        ["$$WHILE_1_PLUS_1_EQUAL_2$$", LanguageDB["$loop-start-endless"]],
        ["$$END_WHILE$$", LanguageDB["$loop-end"]],
        ["$$INPUT_P_INCREMENT$$", LanguageDB["$input/increment"]],
        ["$$INPUT_P_DECREMENT$$", LanguageDB["$input/decrement"]],
        ["$$INPUT_GET$$", LanguageDB["$input/get"]],
        [
            "$$IF_INPUT_EQUAL_DELIMITER_0$$",
            LanguageDB["$if =="]("Skipper$$SKIPPER_INDEX$$[0]")
        ],
        ["$$BREAK$$", LanguageDB["$break"]],
        ["$$ENDIF$$", LanguageDB["$endif"]],
        ["$$ENTRY$$", LanguageDB["$label-def"]("$entry", skipper_index)],
        ["$$DROP_OUT$$", LanguageDB["$label-def"]("$drop-out", skipper_index)],
        ["$$GOTO_ENTRY$$", LanguageDB["$goto"]("$entry", skipper_index)],
        ["$$GOTO_REENTRY_PREPARATION$$", LanguageDB["$goto"]("$re-start")],
        ["$$MARK_LEXEME_START$$", LanguageDB["$mark-lexeme-start"]],
        ["$$DELIMITER_REMAINDER_TEST$$", delimiter_remainder_test_str],
        [
            "$$SET_INPUT_P_BEHIND_DELIMITER$$",
            LanguageDB["$input/add"](len(EndSequence) - 1)
        ],
        ["$$MISSING_CLOSING_DELIMITER$$", MissingClosingDelimiterAction],
    ])

    # Line and column number counting
    code_str = __range_skipper_lc_counting_replacements(code_str, EndSequence)

    # The finishing touch
    code_str = blue_print(code_str, [[
        "$$SKIPPER_INDEX$$", __nice(skipper_index)
    ], ["$$GOTO_DROP_OUT$$", LanguageDB["$goto"]("$drop-out", skipper_index)]])

    return code_str
Ejemplo n.º 48
0
def __terminal_states(SMD, action_db, OnFailureAction, EndOfStreamAction,
                      SupportBeginOfLineF, PreConditionIDList, LanguageDB):
    """NOTE: During backward-lexing, for a pre-condition, there is not need for terminal
             states, since only the flag 'pre-condition fulfilled is raised.
    """
    assert SMD.__class__.__name__ == "StateMachineDecorator"
    sm = SMD.sm()
    PostConditionedStateMachineID_List = SMD.post_contexted_sm_id_list()
    DirectlyReachedTerminalID_List = SMD.directly_reached_terminal_id_list()

    # (*) specific terminal states of patterns (entered from acceptance states)
    specific_terminal_states = []
    for state_machine_id, pattern_action_info in action_db.items():
        code = get_terminal_code(state_machine_id, SMD, pattern_action_info,
                                 SupportBeginOfLineF, LanguageDB)

        specific_terminal_states.extend(code)

    # If there is at least a single terminal, the the 're-entry' preparation must be accomplished
    if len(action_db) != 0: get_label("$re-start", U=True)

    # (*) preparation of the reentry without return:
    #     delete all pre-condition fullfilled flags
    delete_pre_context_flags = []
    for pre_context_sm_id in PreConditionIDList:
        delete_pre_context_flags.append("    ")
        delete_pre_context_flags.append(LanguageDB["$assignment"](
            "pre_context_%s_fulfilled_f" % __nice(pre_context_sm_id), 0))

    #  -- execute 'on_failure' pattern action
    #  -- goto initial state
    end_of_stream_code_action_str = __adorn_action_code(
        EndOfStreamAction, SMD, SupportBeginOfLineF)

    # -- FAILURE ACTION: Under 'normal' circumstances the on_failure action is simply to be executed
    #                    since the 'get_forward()' incremented the 'current' pointer.
    #                    HOWEVER, when end of file has been reached the 'current' pointer has to
    #                    be reset so that the initial state can drop out on the buffer limit code
    #                    and then transit to the end of file action.
    # NOTE: It is possible that 'miss' happens after a chain of characters appeared. In any case the input
    #       pointer must be setup right after the lexeme start. This way, the lexer becomes a new chance as
    #       soon as possible.
    on_failure = __terminal_on_failure_prolog(LanguageDB)
    msg = __adorn_action_code(OnFailureAction, SMD, SupportBeginOfLineF)

    on_failure.append(msg)

    if PreConditionIDList == []: precondition_involved_f = "0"
    else: precondition_involved_f = "1"

    prolog = __terminal_state_prolog

    router = Address(
        "$terminal-router",
        None,
        [
            blue_print(__terminal_router_prolog_str, [
                [
                    "$$RESTORE_LAST_ACCEPTANCE_POS$$",
                    LanguageDB["$input/seek_position"]
                    ("last_acceptance_input_position")
                ],
                [
                    "$$TERMINAL_FAILURE-REF$$",
                    "QUEX_LABEL(%i)" % get_address("$terminal-FAILURE")
                ],
                ["$$TERMINAL_FAILURE$$",
                 get_label("$terminal-FAILURE")],
            ]),
            # DO NOT 'U=True' for the state router. This is done automatically if
            # 'goto reload' is used.
            get_label("$state-router"),
            ";",
            __terminal_router_epilog_str,
        ])

    epilog = blue_print(__terminal_state_epilog, [
        ["$$FAILURE_ACTION$$", "".join(on_failure)],
        ["$$END_OF_STREAM_ACTION$$", end_of_stream_code_action_str],
        ["$$TERMINAL_END_OF_STREAM-DEF$$",
         get_label("$terminal-EOF")],
        ["$$TERMINAL_FAILURE-DEF$$",
         get_label("$terminal-FAILURE")],
        ["$$STATE_MACHINE_NAME$$", SMD.name()],
        ["$$GOTO_START_PREPARATION$$",
         get_label("$re-start", U=True)],
    ])

    reentry_preparation = blue_print(__on_continue_reentry_preparation_str, [
        ["$$REENTRY_PREPARATION$$",
         get_label("$re-start")],
        [
            "$$DELETE_PRE_CONDITION_FULLFILLED_FLAGS$$",
            "".join(delete_pre_context_flags)
        ],
        ["$$GOTO_START$$", get_label("$start", U=True)],
        [
            "$$COMMENT_ON_POST_CONTEXT_INITIALIZATION$$",
            comment_on_post_context_position_init_str
        ],
        [
            "$$TERMINAL_FAILURE-REF$$",
            "QUEX_LABEL(%i)" % get_address("$terminal-FAILURE")
        ],
    ])

    txt = []
    txt.append(router)
    txt.append(prolog)
    txt.extend(specific_terminal_states)
    txt.append(epilog)
    txt.append(reentry_preparation)

    return txt
Ejemplo n.º 49
0
def get_skipper(EndSequence, Mode=None, IndentationCounterTerminalID=None, OnSkipRangeOpenStr=""):
    assert type(EndSequence) == list
    assert len(EndSequence) >= 1
    assert map(type, EndSequence) == [int] * len(EndSequence)

    local_variable_db = {}

    global template_str

    LanguageDB = Setup.language_db

    # Name the $$SKIPPER$$
    skipper_index = sm_index.get()

    # Determine the $$DELIMITER$$
    delimiter_str, delimiter_length_str, delimiter_comment_str = get_character_sequence(EndSequence)

    delimiter_comment_str = LanguageDB["$comment"]("                         Delimiter: " + delimiter_comment_str)

    # Determine the check for the tail of the delimiter
    delimiter_remainder_test_str = ""
    if len(EndSequence) != 1:
        txt = ""
        i = 0
        for letter in EndSequence[1:]:
            i += 1
            txt += "    " + LanguageDB["$input/get-offset"](i - 1) + "\n"
            txt += "    " + LanguageDB["$if !="]("Skipper$$SKIPPER_INDEX$$[%i]" % i)
            txt += "         goto %s;" % get_label("$entry", skipper_index, U=True)
            txt += "    " + LanguageDB["$endif"]
        delimiter_remainder_test_str = txt

    if not end_delimiter_is_subset_of_indentation_counter_newline(Mode, EndSequence):
        goto_after_end_of_skipping_str = "goto %s;" % get_label("$start", U=True)
    else:
        # If there is indentation counting involved, then the counter's terminal id must
        # be determined at this place.
        assert IndentationCounterTerminalID != None
        # If the ending delimiter is a subset of what the 'newline' pattern triggers
        # in indentation counting => move on to the indentation counter.
        goto_after_end_of_skipping_str = "goto %s;" % get_label(
            "$terminal-direct", IndentationCounterTerminalID, U=True
        )

    if OnSkipRangeOpenStr != "":
        on_skip_range_open_str = OnSkipRangeOpenStr
    else:
        on_skip_range_open_str = get_on_skip_range_open(Mode, EndSequence)

    # The main part
    code_str = blue_print(
        template_str,
        [
            ["$$DELIMITER$$", delimiter_str],
            ["$$DELIMITER_LENGTH$$", delimiter_length_str],
            ["$$DELIMITER_COMMENT$$", delimiter_comment_str],
            ["$$WHILE_1_PLUS_1_EQUAL_2$$", LanguageDB["$loop-start-endless"]],
            ["$$END_WHILE$$", LanguageDB["$loop-end"]],
            ["$$INPUT_P_INCREMENT$$", LanguageDB["$input/increment"]],
            ["$$INPUT_P_DECREMENT$$", LanguageDB["$input/decrement"]],
            ["$$INPUT_GET$$", LanguageDB["$input/get"]],
            ["$$IF_INPUT_EQUAL_DELIMITER_0$$", LanguageDB["$if =="]("Skipper$$SKIPPER_INDEX$$[0]")],
            ["$$ENDIF$$", LanguageDB["$endif"]],
            ["$$ENTRY$$", get_label("$entry", skipper_index)],
            ["$$RELOAD$$", get_label("$reload", skipper_index)],
            ["$$GOTO_ENTRY$$", get_label("$entry", skipper_index, U=True)],
            # When things were skipped, no change to acceptance flags or modes has
            # happend. One can jump immediately to the start without re-entry preparation.
            ["$$GOTO_AFTER_END_OF_SKIPPING$$", goto_after_end_of_skipping_str],
            ["$$MARK_LEXEME_START$$", LanguageDB["$mark-lexeme-start"]],
            ["$$DELIMITER_REMAINDER_TEST$$", delimiter_remainder_test_str],
            ["$$ON_SKIP_RANGE_OPEN$$", on_skip_range_open_str],
        ],
    )

    # Line and column number counting
    code_str, reference_p_f = __lc_counting_replacements(code_str, EndSequence)

    # The finishing touch
    code_str = blue_print(
        code_str,
        [["$$SKIPPER_INDEX$$", __nice(skipper_index)], ["$$GOTO_RELOAD$$", get_label("$reload", skipper_index)]],
    )

    if reference_p_f:
        local_variable_db["QUEX_OPTION_COLUMN_NUMBER_COUNTING/reference_p"] = Variable(
            "reference_p",
            "QUEX_TYPE_CHARACTER_POSITION",
            None,
            "(QUEX_TYPE_CHARACTER_POSITION)0x0",
            "QUEX_OPTION_COLUMN_NUMBER_COUNTING",
        )

    return code_str, local_variable_db
Ejemplo n.º 50
0
def get_skipper(TriggerSet):
    """This function implements simple 'skipping' in the sense of passing by
       characters that belong to a given set of characters--the TriggerSet.
    """
    global template_str
    assert TriggerSet.__class__.__name__ == "NumberSet"
    assert not TriggerSet.is_empty()

    LanguageDB = Setup.language_db

    skipper_index = sm_index.get()
    # Mini trigger map:  [ trigger set ] --> loop start
    # That means: As long as characters of the trigger set appear, we go to the loop start.
    transition_map = TransitionMap() # (don't worry about 'drop-out-ranges' etc.)
    transition_map.add_transition(TriggerSet, skipper_index)
    # On buffer limit code, the skipper must transit to a dedicated reloader

    iteration_code = transition_block.do(transition_map.get_trigger_map(), 
                                         skipper_index, 
                                         DSM=None, 
                                         GotoReload_Str="goto %s;" % get_label("$reload", skipper_index))

    comment_str = LanguageDB["$comment"]("Skip any character in " + TriggerSet.get_utf8_string())

    # Line and column number counting
    prolog = __lc_counting_replacements(prolog_txt, TriggerSet)
    epilog = __lc_counting_replacements(epilog_txt, TriggerSet)

    prolog = blue_print(prolog,
                        [
                         ["$$DELIMITER_COMMENT$$",              comment_str],
                         ["$$SKIPPER_INDEX$$",                  "%i" % skipper_index],
                         ["$$INPUT_GET$$",                      LanguageDB["$input/get"]],
                        ])

    epilog = blue_print(epilog,
                        [
                         ["$$INPUT_P_INCREMENT$$",              LanguageDB["$input/increment"]],
                         ["$$INPUT_P_DECREMENT$$",              LanguageDB["$input/decrement"]],
                         ["$$IF_INPUT_EQUAL_DELIMITER_0$$",     LanguageDB["$if =="]("SkipDelimiter$$SKIPPER_INDEX$$[0]")],
                         ["$$ENDIF$$",                          LanguageDB["$endif"]],
                         ["$$LOOP_REENTRANCE$$",                get_label("$entry", skipper_index)],
                         ["$$INPUT_EQUAL_BUFFER_LIMIT_CODE$$",  LanguageDB["$BLC"]],
                         ["$$RELOAD$$",                         get_label("$reload", skipper_index)],
                         ["$$DROP_OUT_DIRECT$$",                get_label("$drop-out", skipper_index, U=True)],
                         ["$$SKIPPER_INDEX$$",                  "%i" % skipper_index],
                         ["$$GOTO_TERMINAL_EOF$$",              get_label("$terminal-EOF", U=True)],
                         # When things were skipped, no change to acceptance flags or modes has
                         # happend. One can jump immediately to the start without re-entry preparation.
                         ["$$GOTO_START$$",                     get_label("$start", U=True)], 
                         ["$$MARK_LEXEME_START$$",              LanguageDB["$mark-lexeme-start"]],
                        ])

    code = [ prolog ]
    code.extend(iteration_code)
    code.append(epilog)

    local_variable_db = {}
    local_variable_db["QUEX_OPTION_COLUMN_NUMBER_COUNTING/reference_p"] = \
                     Variable("reference_p", 
                              "QUEX_TYPE_CHARACTER_POSITION", 
                              None,
                              "(QUEX_TYPE_CHARACTER_POSITION)0x0",
                              "QUEX_OPTION_COLUMN_NUMBER_COUNTING") 

    return code, local_variable_db
Ejemplo n.º 51
0
def __lc_counting_replacements(code_str, EndSequence):
    """Line and Column Number Counting(Range Skipper):
     
         -- in loop if there appears a newline, then do:
            increment line_n
            set position from where to count column_n
         -- at end of skipping do one of the following:
            if end delimiter contains newline:
               column_n = number of letters since last new line in end delimiter
               increment line_n by number of newlines in end delimiter.
               (NOTE: in this case the setting of the position from where to count
                      the column_n can be omitted.)
            else:
               column_n = current_position - position from where to count column number.

       NOTE: On reload we do count the column numbers and reset the column_p.
    """
    LanguageDB = Setup.language_db


    def get_character_n_after_last_newline(Sequence):
        tmp = copy(Sequence)
        tmp.reverse()
        try:    return tmp.index(ord('\n'))
        except: return -1

    char_n_after_last_newline = get_character_n_after_last_newline(EndSequence)

    reference_p_def = ""

    in_loop         = ""
    end_procedure   = ""
    exit_loop       = ""
    before_reload   = ""
    after_reload    = ""
    exit_loop       = "            " + LanguageDB["$break"]
    on_first_delimiter = ""

    reference_p_required_f = False

    # Line/Column Counting:
    newline_number_in_delimiter = EndSequence.count(ord('\n'))

    if EndSequence == map(ord, "\n") or EndSequence == map(ord, "\r\n"):
        #  (1) If the end-delimiter is a newline 
        #      => there cannot appear a newline inside the comment
        #      => IN LOOP: no line number increment
        #                  no reference pointer required for column counting
        end_procedure += "        __QUEX_IF_COUNT_COLUMNS_SET((size_t)1);\n" 
        end_procedure += "        __QUEX_IF_COUNT_LINES_ADD((size_t)1);\n"

    else:
        #  (2) If end-delimiter is NOT newline
        #      => there can appear a newline inside the comment
        if newline_number_in_delimiter == 0:
            # -- no newlines in delimiter => line and column number 
            #                                must be counted.
            in_loop       = line_column_counter_in_loop
            end_procedure = "        __QUEX_IF_COUNT_COLUMNS_ADD((size_t)(QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer)\n" + \
                            "                                    - reference_p));\n" 
            reference_p_required_f = True
        else:
            # -- newline inside delimiter => line number must be counted
            #                                column number is fixed.
            end_procedure = "        __QUEX_IF_COUNT_COLUMNS_SET((size_t)%i);\n" \
                            % (char_n_after_last_newline + 1)

            if    EndSequence[0] == ord('\n') \
               or len(EndSequence) > 1 and EndSequence[0:2] == [ord('\r'), ord('\n')]: 
                # If the first character in the sequence is newline, then the line counting
                # may is prevented by the loop exit. Now, we need to count.
                on_first_delimiter = "/* First delimiter char was a newline */\n" + \
                                     "    __QUEX_IF_COUNT_LINES_ADD((size_t)1);\n" 
                end_procedure += "        __QUEX_IF_COUNT_LINES_ADD((size_t)%i);\n" % (newline_number_in_delimiter - 1)
            else:
                in_loop        = line_counter_in_loop
                end_procedure += "        __QUEX_IF_COUNT_LINES_ADD((size_t)%i);\n" % newline_number_in_delimiter

        
    if reference_p_required_f:
        reference_p_def = "    __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n"
        before_reload   = "    __QUEX_IF_COUNT_COLUMNS_ADD((size_t)(QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer)\n" + \
                          "                                - reference_p));\n" 
        after_reload    = "        __QUEX_IF_COUNT_COLUMNS(reference_p = QUEX_NAME(Buffer_tell_memory_adr)(&me->buffer));\n"

    if len(EndSequence) > 1:
        end_procedure = LanguageDB["$input/add"](len(EndSequence)-1) + \
                        "\n" + end_procedure

    return blue_print(code_str,
                     [["$$LC_COUNT_COLUMN_N_POINTER_DEFINITION$$", reference_p_def],
                      ["$$LC_COUNT_IN_LOOP$$",                     in_loop],
                      ["$$LC_COUNT_END_PROCEDURE$$",               end_procedure],
                      ["$$LC_COUNT_BEFORE_RELOAD$$",               before_reload],
                      ["$$LC_COUNT_AFTER_RELOAD$$",                after_reload],
                      ["$$LC_ON_FIRST_DELIMITER$$",                on_first_delimiter],
                      ["$$LC_COUNT_LOOP_EXIT$$",                   exit_loop],
                      ]), \
           reference_p_required_f
Ejemplo n.º 52
0
def output(global_setup):
    global file_str
    assert lexer_mode.token_id_db.has_key("TERMINATION"), \
           "TERMINATION token id must be defined by setup or user."
    assert lexer_mode.token_id_db.has_key("UNINITIALIZED"), \
           "UNINITIALIZED token id must be defined by setup or user."
    # (*) Token ID File ________________________________________________________________
    #
    #     The token id file can either be specified as database of
    #     token-id names, or as a file that directly assigns the token-ids
    #     to variables. If the flag '--user-token-id-file' is defined, then
    #     then the token-id file is provided by the user. Otherwise, the
    #     token id file is created by the token-id maker.
    #
    #     The token id maker considers the file passed by the option '-t'
    #     as the database file and creates a C++ file with the output filestem
    #     plus the suffix "--token-ids". Note, that the token id file is a
    #     header file.
    #
    setup = Setup(global_setup)
    if len(lexer_mode.token_id_db.keys()) == 2:
        # TERMINATION + UNINITIALIZED = 2 token ids. If they are the only ones nothing can be done.
        print "error: No token id other than %sTERMINATION and %sUNINITIALIZED are defined. " % \
              (setup.token_prefix, setup.token_prefix)
        print "error: Quex refuses to proceed. Please, use the 'token { ... }' section to "
        print "error: specify at least one other token id."
        sys.exit(-1)

    if global_setup.input_user_token_id_file != "":
        ## print "(0) token ids provided by user"
        ## print "   '%s'" % global_setup.input_user_token_id_file
        global_setup.output_token_id_file = global_setup.input_user_token_id_file
        return
    
    if global_setup.input_token_id_db == "":
        print "error: token-id database not specified"
        sys.exit(-1)
        
    ## print "   token class file = '%s'" % global_setup.input_token_class_file
    ## print "   => '%s'" % global_setup.output_token_id_file
    
    #______________________________________________________________________________________
    L = max(map(lambda name: len(name), lexer_mode.token_id_db.keys()))
    def space(Name):
        return " " * (L - len(Name))

    # -- define values for the token ids
    # NO LONGER: token_id_txt  = "namespace quex {\n"  
    token_id_txt = ""
    if setup.input_foreign_token_id_file != "":
        token_id_txt += "#include\"%s\"\n" % setup.input_foreign_token_id_file

    else:
        token_names = lexer_mode.token_id_db.keys()
        token_names.sort()

        i = setup.id_count_offset
        for token_name in token_names:
            token_info = lexer_mode.token_id_db[token_name] 
            if token_info.number == None: 
                token_info.number = i; i+= 1
            token_id_txt += "#define %s%s %s((QUEX_TOKEN_ID_TYPE)%i)\n" % (setup.token_prefix,
                                                                           token_name, space(token_name), 
                                                                           token_info.number)
    # NO LONGER: token_id_txt += "} // namespace quex\n" 

    # -- define the function for token names
    db_build_txt = ""
    for token_name in lexer_mode.token_id_db.keys():
        db_build_txt += '\n           db[%s%s] %s= std::string("%s");' % (setup.token_prefix,
                                                                          token_name,
                                                                          space(token_name),
                                                                          token_name)
    
    t = time.localtime()
    date_str = "%iy%im%id_%ih%02im%02is" % (t[0], t[1], t[2], t[3], t[4], t[5])

    
    file_str = file_str.replace("$$CONTENT$$", func_str)
    content = blue_print(file_str,
                         [["$$TOKEN_ID_DEFINITIONS$$",        token_id_txt],
                          ["$$DATE$$",                        time.asctime()],
                          ["$$TOKEN_CLASS_DEFINITION_FILE$$", setup.token_class_file],
                          ["$$DATE_IG$$",                     date_str],
                          ["$$TOKEN_ID_CASES$$",              db_build_txt],
                          ["$$TOKEN_PREFIX$$",                setup.token_prefix],
                          ["$$TOKEN_CLASS$$",                 setup.token_class]])

    fh = open(setup.output_file, "wb")
    if os.linesep != "\n": content = content.replace("\n", os.linesep)
    fh.write(content)
    fh.close()
Ejemplo n.º 53
0
def output(global_setup):
    global file_str
    assert lexer_mode.token_id_db.has_key("TERMINATION"), \
           "TERMINATION token id must be defined by setup or user."
    assert lexer_mode.token_id_db.has_key("UNINITIALIZED"), \
           "UNINITIALIZED token id must be defined by setup or user."
    # (*) Token ID File ________________________________________________________________
    #
    #     The token id file can either be specified as database of
    #     token-id names, or as a file that directly assigns the token-ids
    #     to variables. If the flag '--user-token-id-file' is defined, then
    #     then the token-id file is provided by the user. Otherwise, the
    #     token id file is created by the token-id maker.
    #
    #     The token id maker considers the file passed by the option '-t'
    #     as the database file and creates a C++ file with the output filestem
    #     plus the suffix "--token-ids". Note, that the token id file is a
    #     header file.
    #
    setup = Setup(global_setup)
    if len(lexer_mode.token_id_db.keys()) == 2:
        # TERMINATION + UNINITIALIZED = 2 token ids. If they are the only ones nothing can be done.
        print "error: No token id other than %sTERMINATION and %sUNINITIALIZED are defined. " % \
              (setup.token_prefix, setup.token_prefix)
        print "error: Quex refuses to proceed. Please, use the 'token { ... }' section to "
        print "error: specify at least one other token id."
        sys.exit(-1)

    if global_setup.input_user_token_id_file != "":
        ## print "(0) token ids provided by user"
        ## print "   '%s'" % global_setup.input_user_token_id_file
        global_setup.output_token_id_file = global_setup.input_user_token_id_file
        return

    if global_setup.input_token_id_db == "":
        print "error: token-id database not specified"
        sys.exit(-1)

    ## print "   token class file = '%s'" % global_setup.input_token_class_file
    ## print "   => '%s'" % global_setup.output_token_id_file

    #______________________________________________________________________________________
    L = max(map(lambda name: len(name), lexer_mode.token_id_db.keys()))

    def space(Name):
        return " " * (L - len(Name))

    # -- define values for the token ids
    # NO LONGER: token_id_txt  = "namespace quex {\n"
    token_id_txt = ""
    if setup.input_foreign_token_id_file != "":
        token_id_txt += "#include\"%s\"\n" % setup.input_foreign_token_id_file

    else:
        token_names = lexer_mode.token_id_db.keys()
        token_names.sort()

        i = setup.id_count_offset
        for token_name in token_names:
            token_info = lexer_mode.token_id_db[token_name]
            if token_info.number == None:
                token_info.number = i
                i += 1
            token_id_txt += "#define %s%s %s((QUEX_TOKEN_ID_TYPE)%i)\n" % (
                setup.token_prefix, token_name, space(token_name),
                token_info.number)
    # NO LONGER: token_id_txt += "} // namespace quex\n"

    # -- define the function for token names
    db_build_txt = ""
    for token_name in lexer_mode.token_id_db.keys():
        db_build_txt += '\n           db[%s%s] %s= std::string("%s");' % (
            setup.token_prefix, token_name, space(token_name), token_name)

    t = time.localtime()
    date_str = "%iy%im%id_%ih%02im%02is" % (t[0], t[1], t[2], t[3], t[4], t[5])

    file_str = file_str.replace("$$CONTENT$$", func_str)
    content = blue_print(
        file_str,
        [["$$TOKEN_ID_DEFINITIONS$$", token_id_txt],
         ["$$DATE$$", time.asctime()],
         ["$$TOKEN_CLASS_DEFINITION_FILE$$", setup.token_class_file],
         ["$$DATE_IG$$", date_str], ["$$TOKEN_ID_CASES$$", db_build_txt],
         ["$$TOKEN_PREFIX$$", setup.token_prefix],
         ["$$TOKEN_CLASS$$", setup.token_class]])

    fh = open(setup.output_file, "wb")
    if os.linesep != "\n": content = content.replace("\n", os.linesep)
    fh.write(content)
    fh.close()
Ejemplo n.º 54
0
def write_engine_header(Modes, Setup):

    QuexClassHeaderFileTemplate = (Setup.QUEX_TEMPLATE_DB_DIR +
                                   "/template/lexical_analyzer_class").replace(
                                       "//", "/")
    CoreEngineDefinitionsHeader = (Setup.QUEX_TEMPLATE_DB_DIR +
                                   "/core_engine/").replace("//", "/")
    QuexClassHeaderFileOutput = Setup.output_file_stem
    LexerClassName = Setup.output_engine_name
    VersionID = Setup.input_application_version_id
    QuexVersionID = Setup.QUEX_VERSION

    # -- determine character type according to number of bytes per ucs character code point
    #    for the internal engine.
    quex_character_type_str = {
        1: "uint8_t ",
        2: "uint16_t",
        4: "uint32_t",
        "wchar_t": "wchar_t"
    }[Setup.bytes_per_ucs_code_point]
    quex_lexeme_type_str = {
        1: "char    ",
        2: "int16_t",
        4: "int32_t",
        "wchar_t": "wchar_t"
    }[Setup.bytes_per_ucs_code_point]

    #    are bytes of integers Setup 'little endian' or 'big endian' ?
    if Setup.byte_order == "little":
        quex_coding_name_str = {
            1: "ASCII",
            2: "UCS-2LE",
            4: "UCS-4LE",
            "wchar_t": "WCHAR_T"
        }[Setup.bytes_per_ucs_code_point]
    else:
        quex_coding_name_str = {
            1: "ASCII",
            2: "UCS-2BE",
            4: "UCS-4BE",
            "wchar_t": "WCHAR_T"
        }[Setup.bytes_per_ucs_code_point]

    # -- determine whether the lexical analyser needs indentation counting
    #    support. if one mode has an indentation handler, than indentation
    #    support must be provided.
    indentation_support_f = False
    for mode in Modes.values():
        if mode.on_indentation.get_code() != "":
            indentation_support_f = True
            break

    lex_id_definitions_str = ""
    # NOTE: First mode-id needs to be '1' for compatibility with flex generated engines
    i = 0
    for name in Modes.keys():
        i += 1
        lex_id_definitions_str += "const int LEX_ID_%s = %i;\n" % (name, i)

    include_guard_extension = get_include_guard_extension(
        Setup.output_file_stem)

    # -- instances of mode classes as members of the lexer
    mode_object_members_txt,     \
    constructor_txt,             \
    mode_specific_functions_txt, \
    friend_txt =                 \
         get_mode_class_related_code_fragments(Modes.values(), LexerClassName)

    # -- define a pointer that directly has the type of the derived class
    if Setup.input_derived_class_name == "":
        Setup.input_derived_class_name = LexerClassName
        derived_class_type_declaration = ""
    else:
        derived_class_type_declaration = "class %s;" % Setup.input_derived_class_name

    # -- the friends of the class
    friends_str = ""
    for friend in Setup.input_lexer_class_friends:
        friends_str += "    friend class %s;\n" % friend

    # -- the class body extension
    class_body_extension_str = lexer_mode.class_body.get_code()

    # -- the class constructor extension
    class_constructor_extension_str = lexer_mode.class_init.get_code()

    fh = open_file_or_die(QuexClassHeaderFileTemplate)
    template_code_txt = fh.read()
    fh.close()

    # -- check if exit/entry handlers have to be active
    entry_handler_active_f = False
    exit_handler_active_f = False
    for mode in Modes.values():
        if mode.on_entry_code_fragments() != []: entry_handler_active_f = True
        if mode.on_exit_code_fragments() != []: exit_handler_active_f = True

    txt = template_code_txt

    def set_switch(txt, SwitchF, Name):
        if SwitchF:
            txt = txt.replace("$$SWITCH$$ %s" % Name, "#define    %s" % Name)
        else:
            txt = txt.replace("$$SWITCH$$ %s" % Name, "// #define %s" % Name)
        return txt

    txt = set_switch(txt, entry_handler_active_f,
                     "__QUEX_OPTION_ON_ENTRY_HANDLER_PRESENT")
    txt = set_switch(txt, exit_handler_active_f,
                     "__QUEX_OPTION_ON_EXIT_HANDLER_PRESENT")
    txt = set_switch(txt, indentation_support_f,
                     "__QUEX_OPTION_INDENTATION_TRIGGER_SUPPORT")
    txt = set_switch(txt, True,
                     "__QUEX_OPTION_SUPPORT_BEGIN_OF_LINE_PRE_CONDITION")
    txt = set_switch(txt, Setup.enable_iconv_f, "QUEX_OPTION_ENABLE_ICONV")
    txt = set_switch(txt, not Setup.disable_token_queue_f,
                     "QUEX_OPTION_TOKEN_SENDING_VIA_QUEUE")
    txt = set_switch(txt, not Setup.disable_string_accumulator_f,
                     "QUEX_OPTION_STRING_ACCUMULATOR")
    txt = set_switch(txt, Setup.post_categorizer_f,
                     "QUEX_OPTION_POST_CATEGORIZER")
    txt = set_switch(txt, True, "QUEX_OPTION_VIRTUAL_FUNCTION_ON_ACTION_ENTRY")
    txt = set_switch(txt, True, "QUEX_OPTION_LINE_NUMBER_COUNTING")
    txt = set_switch(txt, True, "QUEX_OPTION_COLUMN_NUMBER_COUNTING")
    txt = set_switch(txt, Setup.output_debug_f,
                     "QUEX_OPTION_DEBUG_TOKEN_SENDING")
    txt = set_switch(txt, Setup.output_debug_f,
                     "QUEX_OPTION_DEBUG_MODE_TRANSITIONS")
    txt = set_switch(txt, Setup.output_debug_f,
                     "QUEX_OPTION_DEBUG_QUEX_PATTERN_MATCHES")
    txt = set_switch(txt, True, "QUEX_OPTION_INCLUDE_STACK_SUPPORT")
    txt = set_switch(txt, not Setup.no_mode_transition_check_f,
                     "QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK")

    txt = blue_print(txt, [
        ["$$BUFFER_LIMIT_CODE$$",
         "0x%X" % Setup.buffer_limit_code],
        ["$$CONSTRUCTOR_EXTENSTION$$", class_constructor_extension_str],
        ["$$CONSTRUCTOR_MODE_DB_INITIALIZATION_CODE$$", constructor_txt],
        ["$$CORE_ENGINE_DEFINITIONS_HEADER$$", CoreEngineDefinitionsHeader],
        ["$$CLASS_BODY_EXTENSION$$", class_body_extension_str],
        ["$$INCLUDE_GUARD_EXTENSION$$", include_guard_extension],
        [
            "$$INITIAL_LEXER_MODE_ID$$",
            "LEX_ID_" + lexer_mode.initial_mode.get_code()
        ],
        ["$$LEXER_BUILD_DATE$$", time.asctime()],
        ["$$LEXER_BUILD_VERSION$$", VersionID],
        ["$$LEXER_CLASS_FRIENDS$$", friends_str],
        ["$$LEXER_CLASS_NAME$$", LexerClassName],
        ["$$LEXER_DERIVED_CLASS_DECL$$", derived_class_type_declaration],
        ["$$LEXER_DERIVED_CLASS_NAME$$", Setup.input_derived_class_name],
        ["$$LEX_ID_DEFINITIONS$$", lex_id_definitions_str],
        ["$$MAX_MODE_CLASS_N$$", repr(len(Modes))],
        ["$$MODE_CLASS_FRIENDS$$", friend_txt],
        ["$$MODE_OBJECT_MEMBERS$$", mode_object_members_txt],
        ["$$MODE_SPECIFIC_ANALYSER_FUNCTIONS$$", mode_specific_functions_txt],
        [
            "$$PRETTY_INDENTATION$$", "     " + " " *
            (len(LexerClassName) * 2 + 2)
        ],
        ["$$QUEX_TEMPLATE_DIR$$", Setup.QUEX_TEMPLATE_DB_DIR],
        ["$$QUEX_VERSION$$", QuexVersionID],
        ["$$TOKEN_CLASS$$", Setup.input_token_class_name],
        [
            "$$TOKEN_CLASS_DEFINITION_FILE$$",
            Setup.input_token_class_file.replace("//", "/")
        ],
        [
            "$$TOKEN_ID_DEFINITION_FILE$$",
            Setup.output_token_id_file.replace("//", "/")
        ],
        ["$$QUEX_CHARACTER_TYPE$$", quex_character_type_str],
        ["$$QUEX_LEXEME_TYPE$$", quex_lexeme_type_str],
        ["$$CORE_ENGINE_CHARACTER_CODING$$", quex_coding_name_str],
        ["$$USER_DEFINED_HEADER$$",
         lexer_mode.header.get_code() + "\n"],
    ])

    fh_out = open(QuexClassHeaderFileOutput, "wb")
    if os.linesep != "\n": txt = txt.replace("\n", os.linesep)
    fh_out.write(txt)
    fh_out.close()
Ejemplo n.º 55
0
def _do(Descr):
    # The following things must be ensured before the function is called
    assert Descr != None
    assert Descr.__class__.__name__ == "TokenTypeDescriptor"
    ## ALLOW: Descr.get_member_db().keys() == []

    TemplateFile = QUEX_PATH \
                   + Setup.language_db["$code_base"] \
                   + Setup.language_db["$token_template_file"]

    TemplateIFile = QUEX_PATH \
                   + Setup.language_db["$code_base"] \
                   + Setup.language_db["$token_template_i_file"]

    template_str   = open_file_or_die(TemplateFile, Mode="rb").read()
    template_i_str = open_file_or_die(TemplateIFile, Mode="rb").read()
    
    virtual_destructor_str = ""
    if Descr.open_for_derivation_f: virtual_destructor_str = "virtual "

    if Descr.copy.get_pure_code() == "":
        # Default copy operation: Plain Copy of token memory
        copy_str = "__QUEX_STD_memcpy((void*)__this, (void*)__That, sizeof(QUEX_TYPE_TOKEN));\n"
    else:
        copy_str = Descr.copy.get_code()

    take_text_str = Descr.take_text.get_code()
    if take_text_str == "": take_text_str = "return true;\n" 

    include_guard_extension_str = get_include_guard_extension(
                                        Setup.language_db["$namespace-ref"](Descr.name_space) 
                                        + "__" + Descr.class_name)

    # In case of plain 'C' the class name must incorporate the namespace (list)
    token_class_name = Descr.class_name
    if Setup.language == "C":
        token_class_name = Setup.token_class_name_safe

    txt = blue_print(template_str,
             [
              ["$$BODY$$",                    Descr.body.get_code()],
              ["$$CONSTRUCTOR$$",             Descr.constructor.get_code()],
              ["$$COPY$$",                    copy_str],
              ["$$DESTRUCTOR$$",              Descr.destructor.get_code()],
              ["$$DISTINCT_MEMBERS$$",        get_distinct_members(Descr)],
              ["$$FOOTER$$",                  Descr.footer.get_code()],
              ["$$FUNC_TAKE_TEXT$$",          take_text_str],
              ["$$HEADER$$",                  Descr.header.get_code()],
              ["$$INCLUDE_GUARD_EXTENSION$$", include_guard_extension_str],
              ["$$NAMESPACE_CLOSE$$",         Setup.language_db["$namespace-close"](Descr.name_space)],
              ["$$NAMESPACE_OPEN$$",          Setup.language_db["$namespace-open"](Descr.name_space)],
              ["$$QUICK_SETTERS$$",           get_quick_setters(Descr)],
              ["$$SETTERS_GETTERS$$",         get_setter_getter(Descr)],
              ["$$TOKEN_CLASS$$",             token_class_name],
              ["$$TOKEN_REPETITION_N_GET$$",  Descr.repetition_get.get_code()],
              ["$$TOKEN_REPETITION_N_SET$$",  Descr.repetition_set.get_code()],
              ["$$UNION_MEMBERS$$",           get_union_members(Descr)],
              ["$$VIRTUAL_DESTRUCTOR$$",      virtual_destructor_str],
             ])

    txt_i = blue_print(template_i_str, 
                       [
                        ["$$CONSTRUCTOR$$",             Descr.constructor.get_code()],
                        ["$$COPY$$",                    copy_str],
                        ["$$DESTRUCTOR$$",              Descr.destructor.get_code()],
                        ["$$FOOTER$$",                  Descr.footer.get_code()],
                        ["$$FUNC_TAKE_TEXT$$",          take_text_str],
                        ["$$INCLUDE_GUARD_EXTENSION$$", include_guard_extension_str],
                        ["$$NAMESPACE_CLOSE$$",         Setup.language_db["$namespace-close"](Descr.name_space)],
                        ["$$NAMESPACE_OPEN$$",          Setup.language_db["$namespace-open"](Descr.name_space)],
                        ["$$TOKEN_CLASS$$",             token_class_name],
                        ["$$TOKEN_REPETITION_N_GET$$",  Descr.repetition_get.get_code()],
                        ["$$TOKEN_REPETITION_N_SET$$",  Descr.repetition_set.get_code()],
                       ])

    # Return declaration and implementation as two strings
    return txt, txt_i