def parse_section(fh, Setup):

    # NOTE: End of File is supposed to be reached when trying to read a new
    #       section. Thus, the end-of-file catcher does not encompass the beginning.
    position = fh.tell()
    skip_whitespace(fh)
    word = read_next_word(fh)

    try:
        # (*) determine what is defined
        #
        #     -- 'mode { ... }'   => define a mode
        #     -- 'start = ...;'   => define the name of the initial mode
        #     -- 'header { ... }' => define code that is to be pasted on top
        #                            of the engine (e.g. "#include<...>")
        #     -- 'body { ... }'   => define code that is to be pasted in the class' body
        #                            of the engine (e.g. "public: int  my_member;")
        #     -- 'init { ... }'   => define code that is to be pasted in the class' constructors
        #                            of the engine (e.g. "my_member = -1;")
        #     -- 'define { ... }' => define patterns shorthands such as IDENTIFIER for [a-z]+
        #     -- 'token { ... }'  => define token ids
        #
        if word == "start":
            parse_initial_mode_definition(fh)
            return
        
        elif word == "header":
            fragment = code_fragment.parse(fh, "header", AllowBriefTokenSenderF=False)        
            lexer_mode.header = fragment
            return

        elif word == "body":
            fragment = code_fragment.parse(fh, "body", AllowBriefTokenSenderF=False)        
            lexer_mode.class_body = fragment
            return

        elif word == "init":
            fragment = code_fragment.parse(fh, "init", AllowBriefTokenSenderF=False)
            lexer_mode.class_init = fragment
            return
            
        elif word == "define":
            parse_pattern_name_definitions(fh, Setup)
            return

        elif word == "token":       
            parse_token_id_definitions(fh, Setup)
            return

        elif word == "mode":
            mode_definition.parse(fh, Setup)
            return
        else:
            error_msg("sequence '%s' not recognized as valid keyword in this context\n" % word + \
                      "use: 'mode', 'header', 'body', 'init', 'define', 'token' or 'start'", fh)
    except EndOfStreamException:
        fh.seek(position)
        error_msg("End of file reached while parsing '%s' section" % word, fh)
Exemple #2
0
def check_for_event_specification(word, fh, new_mode):
    pos = fh.tell()

    # Allow '<<EOF>>' and '<<FAIL>>' out of respect for classical tools like 'lex'
    if word == "<<EOF>>": word = "on_end_of_stream"
    elif word == "<<FAIL>>": word = "on_failure"
    elif len(word) < 3 or word[:3] != "on_": return False

    comment = "Unknown event handler '%s'. \n" % word + \
              "Note, that any pattern starting with 'on_' is considered an event handler.\n" + \
              "use double quotes to bracket patterns that start with 'on_'."

    __general_validate(fh, new_mode, word, pos)
    verify_word_in_list(word, lexer_mode.event_handler_db.keys(), comment, fh)
    __validate_required_token_policy_queue(word, fh, pos)

    continue_f = True
    if word == "on_end_of_stream":
        # When a termination token is sent, no other token shall follow.
        # => Enforce return from the analyzer! Do not allow CONTINUE!
        continue_f = False

    new_mode.events[word] = code_fragment.parse(fh,
                                                "%s::%s event handler" %
                                                (new_mode.name, word),
                                                ContinueF=continue_f)

    return True
Exemple #3
0
def check_for_event_specification(word, fh, new_mode):
    pos = fh.tell()

    # Allow '<<EOF>>' and '<<FAIL>>' out of respect for classical tools like 'lex'
    if   word == "<<EOF>>":                  word = "on_end_of_stream"
    elif word == "<<FAIL>>":                 word = "on_failure"
    elif len(word) < 3 or word[:3] != "on_": return False

    comment = "Unknown event handler '%s'. \n" % word + \
              "Note, that any pattern starting with 'on_' is considered an event handler.\n" + \
              "use double quotes to bracket patterns that start with 'on_'."

    __general_validate(fh, new_mode, word, pos)
    verify_word_in_list(word, lexer_mode.event_handler_db.keys(), comment, fh)
    __validate_required_token_policy_queue(word, fh, pos)

    continue_f = True
    if word == "on_end_of_stream":
        # When a termination token is sent, no other token shall follow. 
        # => Enforce return from the analyzer! Do not allow CONTINUE!
        continue_f = False

    new_mode.events[word] = code_fragment.parse(fh, "%s::%s event handler" % (new_mode.name, word),
                                                ContinueF=continue_f)

    return True
Exemple #4
0
def __parse_section(fh, descriptor, already_defined_list):
    global token_type_code_fragment_db
    assert type(already_defined_list) == list

    SubsectionList = ["name", "file_name", "standard", "distinct", "union", "inheritable", "noid"] \
                      + token_type_code_fragment_db.keys()

    position = fh.tell()
    skip_whitespace(fh)
    word = read_identifier(fh)
    if word == "":
        fh.seek(position)
        if check(fh, "}"): 
            fh.seek(position) 
            return False
        error_msg("Missing token_type section ('standard', 'distinct', or 'union').", fh)

    verify_word_in_list(word, SubsectionList, 
                        "Subsection '%s' not allowed in token_type section." % word, fh)

    if word == "name":
        if not check(fh, "="):
            error_msg("Missing '=' in token_type 'name' specification.", fh)
        descriptor.class_name, descriptor.name_space, descriptor.class_name_safe = read_namespaced_name(fh, "token_type")
        if not check(fh, ";"):
            error_msg("Missing terminating ';' in token_type 'name' specification.", fh)

    elif word == "inheritable":
        descriptor.open_for_derivation_f = True
        check_or_quit(fh, ";")

    elif word == "noid":
        descriptor.token_contains_token_id_f = False;
        check_or_quit(fh, ";")

    elif word == "file_name":
        if not check(fh, "="):
            error_msg("Missing '=' in token_type 'file_name' specification.", fh)
        descriptor.set_file_name(read_until_letter(fh, ";"))
        if not check(fh, ";"):
            error_msg("Missing terminating ';' in token_type 'file_name' specification.", fh)

    elif word in ["standard", "distinct", "union"]:
        if   word == "standard": parse_standard_members(fh, descriptor, already_defined_list)
        elif word == "distinct": parse_distinct_members(fh, descriptor, already_defined_list)
        elif word == "union":    parse_union_members(fh, descriptor, already_defined_list)

        if not check(fh, "}"):
            fh.seek(position)
            error_msg("Missing closing '}' at end of token_type section '%s'." % word, fh);

    elif word in token_type_code_fragment_db.keys():
        fragment     = code_fragment.parse(fh, word, AllowBriefTokenSenderF=False)        
        descriptor.__dict__[word] = fragment

    else: 
        assert False, "This code section section should not be reachable because 'word'\n" + \
                      "was checked to fit in one of the 'elif' cases."

    return True
Exemple #5
0
def parse_action_code(new_mode, fh, pattern, pattern_state_machine):

    position = fh.tell()
    try:
        skip_whitespace(fh)
        position = fh.tell()

        code_obj = code_fragment.parse(fh,
                                       "regular expression",
                                       ErrorOnFailureF=False)
        if code_obj != None:
            new_mode.add_match(pattern, code_obj, pattern_state_machine)
            return

        fh.seek(position)
        word = read_until_letter(fh, [";"])
        if word == "PRIORITY-MARK":
            # This mark 'lowers' the priority of a pattern to the priority of the current
            # pattern index (important for inherited patterns, that have higher precedence).
            # The parser already constructed a state machine for the pattern that is to
            # be assigned a new priority. Since, this machine is not used, let us just
            # use its id.
            fh.seek(-1, 1)
            check_or_quit(fh, ";",
                          ". Since quex version 0.33.5 this is required.")
            new_mode.add_match_priority(pattern, pattern_state_machine,
                                        pattern_state_machine.get_id(),
                                        fh.name,
                                        get_current_line_info_number(fh))

        elif word == "DELETION":
            # This mark deletes any pattern that was inherited with the same 'name'
            fh.seek(-1, 1)
            check_or_quit(fh, ";",
                          ". Since quex version 0.33.5 this is required.")
            new_mode.add_match_deletion(pattern, pattern_state_machine,
                                        fh.name,
                                        get_current_line_info_number(fh))

        else:
            error_msg("Missing token '{', 'PRIORITY-MARK', 'DELETION', or '=>' after '%s'.\n" % pattern + \
                      "found: '%s'. Note, that since quex version 0.33.5 it is required to add a ';'\n" % word + \
                      "to the commands PRIORITY-MARK and DELETION.", fh)

    except EndOfStreamException:
        fh.seek(position)
        error_msg("End of file reached while parsing action code for pattern.",
                  fh)
def check_for_event_specification(word, fh, new_mode):

    if word == "on_entry":
        # Event: enter into mode
        new_mode.on_entry = code_fragment.parse(
            fh, "%s::on_entry event handler" % new_mode.name)
        return True

    elif word == "on_exit":
        # Event: exit from mode
        new_mode.on_exit = code_fragment.parse(
            fh, "%s::on_exit event handler" % new_mode.name)
        return True

    elif word == "on_match":
        # Event: exit from mode
        new_mode.on_match = code_fragment.parse(
            fh, "%s::on_match event handler" % new_mode.name)
        return True

    elif word == "on_indentation":
        # Event: start of indentation,
        #        first non-whitespace after whitespace
        new_mode.on_indentation = code_fragment.parse(
            fh, "%s::on_indentation event handler" % new_mode.name)
        return True

    elif word == "on_failure" or word == "<<FAIL>>":
        # Event: No pattern matched for current position.
        # NOTE: See 'on_end_of_stream' comments.
        new_mode.on_failure = code_fragment.parse(
            fh, "%s::on_failure event handler" % new_mode.name)
        return True

    elif word == "on_end_of_stream" or word == "<<EOF>>":
        # Event: End of data stream / end of file
        # NOTE: The regular expression parser relies on <<EOF>> and <<FAIL>>. So those
        #       patterns are entered here, even if later versions of quex might dismiss
        #       those rule deefinitions in favor of consistent event handlers.
        new_mode.on_end_of_stream = code_fragment.parse(
            fh, "%s::on_end_of_stream event handler" % new_mode.name)
        return True

    elif len(word) >= 3 and word[:3] == "on_":
        error_msg("Unknown event handler '%s'. Known event handlers are:\n\n" % word + \
                  "on_entry, on_exit, on_indentation, on_end_of_stream, on_failure. on_match\n\n" + \
                  "Note, that any pattern starting with 'on_' is considered an event handler.\n" + \
                  "use double quotes to bracket patterns that start with 'on_'.", fh)

    # word was not an event specification
    return False
Exemple #7
0
def parse_action_code(new_mode, fh, pattern, pattern_state_machine):

    position = fh.tell()
    try:
        skip_whitespace(fh)
        position = fh.tell()
            
        code_obj = code_fragment.parse(fh, "regular expression", ErrorOnFailureF=False) 
        if code_obj != None:
            new_mode.add_match(pattern, code_obj, pattern_state_machine)
            return

        fh.seek(position)
        word = read_until_letter(fh, [";"])
        if word == "PRIORITY-MARK":
            # This mark 'lowers' the priority of a pattern to the priority of the current
            # pattern index (important for inherited patterns, that have higher precedence).
            # The parser already constructed a state machine for the pattern that is to
            # be assigned a new priority. Since, this machine is not used, let us just
            # use its id.
            fh.seek(-1, 1)
            check_or_quit(fh, ";", ". Since quex version 0.33.5 this is required.")
            new_mode.add_match_priority(pattern, pattern_state_machine, pattern_state_machine.get_id(), 
                                        fh.name, get_current_line_info_number(fh))

        elif word == "DELETION":
            # This mark deletes any pattern that was inherited with the same 'name'
            fh.seek(-1, 1)
            check_or_quit(fh, ";", ". Since quex version 0.33.5 this is required.")
            new_mode.add_match_deletion(pattern, pattern_state_machine, fh.name, get_current_line_info_number(fh))
            
        else:
            error_msg("Missing token '{', 'PRIORITY-MARK', 'DELETION', or '=>' after '%s'.\n" % pattern + \
                      "found: '%s'. Note, that since quex version 0.33.5 it is required to add a ';'\n" % word + \
                      "to the commands PRIORITY-MARK and DELETION.", fh)


    except EndOfStreamException:
        fh.seek(position)
        error_msg("End of file reached while parsing action code for pattern.", fh)
def check_for_event_specification(word, fh, new_mode):

    if word == "on_entry":
        # Event: enter into mode
        new_mode.on_entry = code_fragment.parse(fh, "%s::on_entry event handler" % new_mode.name)
        return True
    
    elif word == "on_exit":
        # Event: exit from mode
        new_mode.on_exit = code_fragment.parse(fh, "%s::on_exit event handler" % new_mode.name)
        return True

    elif word == "on_match":
        # Event: exit from mode
        new_mode.on_match = code_fragment.parse(fh, "%s::on_match event handler" % new_mode.name)
        return True

    elif  word == "on_indentation":
        # Event: start of indentation, 
        #        first non-whitespace after whitespace
        new_mode.on_indentation = code_fragment.parse(fh, "%s::on_indentation event handler" % new_mode.name)
        return True

    elif word == "on_failure" or word == "<<FAIL>>":
        # Event: No pattern matched for current position.
        # NOTE: See 'on_end_of_stream' comments.
        new_mode.on_failure = code_fragment.parse(fh, "%s::on_failure event handler" % new_mode.name)
        return True

    elif word == "on_end_of_stream" or word == "<<EOF>>": 
        # Event: End of data stream / end of file
        # NOTE: The regular expression parser relies on <<EOF>> and <<FAIL>>. So those
        #       patterns are entered here, even if later versions of quex might dismiss
        #       those rule deefinitions in favor of consistent event handlers.
        new_mode.on_end_of_stream = code_fragment.parse(fh, "%s::on_end_of_stream event handler" % new_mode.name)
        return True

    elif len(word) >= 3 and word[:3] == "on_":    
        error_msg("Unknown event handler '%s'. Known event handlers are:\n\n" % word + \
                  "on_entry, on_exit, on_indentation, on_end_of_stream, on_failure. on_match\n\n" + \
                  "Note, that any pattern starting with 'on_' is considered an event handler.\n" + \
                  "use double quotes to bracket patterns that start with 'on_'.", fh)

    # word was not an event specification 
    return False
Exemple #9
0
def __parse_section(fh, descriptor, already_defined_list):
    global token_type_code_fragment_db
    assert type(already_defined_list) == list

    SubsectionList = ["name", "file_name", "standard", "distinct", "union", "inheritable", "noid"] \
                      + token_type_code_fragment_db.keys()

    position = fh.tell()
    skip_whitespace(fh)
    word = read_identifier(fh)
    if word == "":
        fh.seek(position)
        if check(fh, "}"):
            fh.seek(position)
            return False
        error_msg(
            "Missing token_type section ('standard', 'distinct', or 'union').",
            fh)

    verify_word_in_list(
        word, SubsectionList,
        "Subsection '%s' not allowed in token_type section." % word, fh)

    if word == "name":
        if not check(fh, "="):
            error_msg("Missing '=' in token_type 'name' specification.", fh)
        descriptor.class_name, descriptor.name_space, descriptor.class_name_safe = read_namespaced_name(
            fh, "token_type")
        if not check(fh, ";"):
            error_msg(
                "Missing terminating ';' in token_type 'name' specification.",
                fh)

    elif word == "inheritable":
        descriptor.open_for_derivation_f = True
        check_or_quit(fh, ";")

    elif word == "noid":
        descriptor.token_contains_token_id_f = False
        check_or_quit(fh, ";")

    elif word == "file_name":
        if not check(fh, "="):
            error_msg("Missing '=' in token_type 'file_name' specification.",
                      fh)
        descriptor.set_file_name(read_until_letter(fh, ";"))
        if not check(fh, ";"):
            error_msg(
                "Missing terminating ';' in token_type 'file_name' specification.",
                fh)

    elif word in ["standard", "distinct", "union"]:
        if word == "standard":
            parse_standard_members(fh, descriptor, already_defined_list)
        elif word == "distinct":
            parse_distinct_members(fh, descriptor, already_defined_list)
        elif word == "union":
            parse_union_members(fh, descriptor, already_defined_list)

        if not check(fh, "}"):
            fh.seek(position)
            error_msg(
                "Missing closing '}' at end of token_type section '%s'." %
                word, fh)

    elif word in token_type_code_fragment_db.keys():
        fragment = code_fragment.parse(fh, word, AllowBriefTokenSenderF=False)
        descriptor.__dict__[word] = fragment

    else:
        assert False, "This code section section should not be reachable because 'word'\n" + \
                      "was checked to fit in one of the 'elif' cases."

    return True
def parse_section(fh, Setup):

    # NOTE: End of File is supposed to be reached when trying to read a new
    #       section. Thus, the end-of-file catcher does not encompass the beginning.
    position = fh.tell()
    skip_whitespace(fh)
    word = read_next_word(fh)

    try:
        # (*) determine what is defined
        #
        #     -- 'mode { ... }'   => define a mode
        #     -- 'start = ...;'   => define the name of the initial mode
        #     -- 'header { ... }' => define code that is to be pasted on top
        #                            of the engine (e.g. "#include<...>")
        #     -- 'body { ... }'   => define code that is to be pasted in the class' body
        #                            of the engine (e.g. "public: int  my_member;")
        #     -- 'init { ... }'   => define code that is to be pasted in the class' constructors
        #                            of the engine (e.g. "my_member = -1;")
        #     -- 'define { ... }' => define patterns shorthands such as IDENTIFIER for [a-z]+
        #     -- 'token { ... }'  => define token ids
        #
        if word == "start":
            parse_initial_mode_definition(fh)
            return

        elif word == "header":
            fragment = code_fragment.parse(fh,
                                           "header",
                                           AllowBriefTokenSenderF=False)
            lexer_mode.header = fragment
            return

        elif word == "body":
            fragment = code_fragment.parse(fh,
                                           "body",
                                           AllowBriefTokenSenderF=False)
            lexer_mode.class_body = fragment
            return

        elif word == "init":
            fragment = code_fragment.parse(fh,
                                           "init",
                                           AllowBriefTokenSenderF=False)
            lexer_mode.class_init = fragment
            return

        elif word == "define":
            parse_pattern_name_definitions(fh, Setup)
            return

        elif word == "token":
            parse_token_id_definitions(fh, Setup)
            return

        elif word == "mode":
            mode_definition.parse(fh, Setup)
            return
        else:
            error_msg("sequence '%s' not recognized as valid keyword in this context\n" % word + \
                      "use: 'mode', 'header', 'body', 'init', 'define', 'token' or 'start'", fh)
    except EndOfStreamException:
        fh.seek(position)
        error_msg("End of file reached while parsing '%s' section" % word, fh)
Exemple #11
0
def parse_section(fh):
    global default_token_type_definition_triggered_by_mode_definition_f

    # NOTE: End of File is supposed to be reached when trying to read a new
    #       section. Thus, the end-of-file catcher does not encompass the beginning.
    position = fh.tell()
    skip_whitespace(fh)
    word = read_identifier(fh)
    if word == "":
        error_msg("Missing section title.", fh)

    SectionTitleList = ["start", "define", "token", "mode", "repeated_token", "token_type" ] + lexer_mode.fragment_db.keys()

    verify_word_in_list(word, SectionTitleList, "Unknown quex section '%s'" % word, fh)
    try:
        # (*) determine what is defined
        #
        #     -- 'mode { ... }'     => define a mode
        #     -- 'start = ...;'     => define the name of the initial mode
        #     -- 'header { ... }'   => define code that is to be pasted on top
        #                              of the engine (e.g. "#include<...>")
        #     -- 'body { ... }'     => define code that is to be pasted in the class' body
        #                              of the engine (e.g. "public: int  my_member;")
        #     -- 'init { ... }'     => define code that is to be pasted in the class' constructors
        #                              of the engine (e.g. "my_member = -1;")
        #     -- 'define { ... }'   => define patterns shorthands such as IDENTIFIER for [a-z]+
        #     -- 'repeated_token_id = QUEX_TKN_ ...;' => enables token repetition, defines
        #                                                the token id to be repeated.
        #     -- 'token { ... }'    => define token ids
        #     -- 'token_type { ... }'  => define a customized token type
        #
        if word in lexer_mode.fragment_db.keys():
            element_name = lexer_mode.fragment_db[word]
            fragment     = code_fragment.parse(fh, word, AllowBriefTokenSenderF=False)        
            lexer_mode.__dict__[element_name] = fragment
            return

        elif word == "start":
            mode_name = parse_identifier_assignment(fh)
            if mode_name == "":
                error_msg("Missing mode_name after 'start ='", fh)
            elif lexer_mode.initial_mode.get_pure_code() != "":
                error_msg("start mode defined more than once!", fh, DontExitF=True)
                error_msg("previously defined here",
                          lexer_mode.initial_mode.filename,
                          lexer_mode.initial_mode.line_n)
        
            lexer_mode.initial_mode = UserCodeFragment(mode_name, fh.name, 
                                                       get_current_line_info_number(fh))
            return

        elif word == "repeated_token":
            lexer_mode.token_repetition_token_id_list = parse_token_id_definitions(fh, NamesOnlyF=True)
            for token_name in lexer_mode.token_repetition_token_id_list:
                verify_word_in_list(token_name[len(Setup.token_id_prefix):],
                                    lexer_mode.token_id_db.keys(),
                                    "Token ID '%s' not yet defined." % token_name,
                                    fh, ExitF=False)
            return
            
        elif word == "define":
            parse_pattern_name_definitions(fh)
            return

        elif word == "token":       
            parse_token_id_definitions(fh)
            return

        elif word == "token_type":       

            if Setup.token_class_file != "":
                error_msg("Token type definition inadmissible while specifying on the command line\n" + \
                          "the file %s to contain a manually written token class." % repr(Setup.token_class_file),
                          fh)
       
            if lexer_mode.token_type_definition == None:
                lexer_mode.token_type_definition = token_type_definition.parse(fh)
                return

            # Error case:
            if default_token_type_definition_triggered_by_mode_definition_f:
                error_msg("Section 'token_type' must appear before first mode definition.", fh)
            else:
                error_msg("Section 'token_type' has been defined twice.", fh, DontExitF=True)
                error_msg("Previously defined here.",
                          lexer_mode.token_type_definition.file_name_of_token_type_definition,
                          lexer_mode.token_type_definition.line_n_of_token_type_definition)
            return

        elif word == "mode":
            # When the first mode is parsed then a token_type definition must be 
            # present. If not, the default token type definition is considered.
            if lexer_mode.token_type_definition == None:
                sub_fh = open_file_or_die(os.environ["QUEX_PATH"] 
                                          + Setup.language_db["$code_base"] 
                                          + Setup.language_db["$token-default-file"])
                parse_section(sub_fh)
                sub_fh.close()
                default_token_type_definition_triggered_by_mode_definition_f = True

            mode_definition.parse(fh)
            return

        else:
            # This case should have been caught by the 'verify_word_in_list' function
            assert False

    except EndOfStreamException:
        fh.seek(position)
        error_msg("End of file reached while parsing '%s' section" % word, fh)