def parse_section(fh, Setup): # NOTE: End of File is supposed to be reached when trying to read a new # section. Thus, the end-of-file catcher does not encompass the beginning. position = fh.tell() skip_whitespace(fh) word = read_next_word(fh) try: # (*) determine what is defined # # -- 'mode { ... }' => define a mode # -- 'start = ...;' => define the name of the initial mode # -- 'header { ... }' => define code that is to be pasted on top # of the engine (e.g. "#include<...>") # -- 'body { ... }' => define code that is to be pasted in the class' body # of the engine (e.g. "public: int my_member;") # -- 'init { ... }' => define code that is to be pasted in the class' constructors # of the engine (e.g. "my_member = -1;") # -- 'define { ... }' => define patterns shorthands such as IDENTIFIER for [a-z]+ # -- 'token { ... }' => define token ids # if word == "start": parse_initial_mode_definition(fh) return elif word == "header": fragment = code_fragment.parse(fh, "header", AllowBriefTokenSenderF=False) lexer_mode.header = fragment return elif word == "body": fragment = code_fragment.parse(fh, "body", AllowBriefTokenSenderF=False) lexer_mode.class_body = fragment return elif word == "init": fragment = code_fragment.parse(fh, "init", AllowBriefTokenSenderF=False) lexer_mode.class_init = fragment return elif word == "define": parse_pattern_name_definitions(fh, Setup) return elif word == "token": parse_token_id_definitions(fh, Setup) return elif word == "mode": mode_definition.parse(fh, Setup) return else: error_msg("sequence '%s' not recognized as valid keyword in this context\n" % word + \ "use: 'mode', 'header', 'body', 'init', 'define', 'token' or 'start'", fh) except EndOfStreamException: fh.seek(position) error_msg("End of file reached while parsing '%s' section" % word, fh)
def check_for_event_specification(word, fh, new_mode): pos = fh.tell() # Allow '<<EOF>>' and '<<FAIL>>' out of respect for classical tools like 'lex' if word == "<<EOF>>": word = "on_end_of_stream" elif word == "<<FAIL>>": word = "on_failure" elif len(word) < 3 or word[:3] != "on_": return False comment = "Unknown event handler '%s'. \n" % word + \ "Note, that any pattern starting with 'on_' is considered an event handler.\n" + \ "use double quotes to bracket patterns that start with 'on_'." __general_validate(fh, new_mode, word, pos) verify_word_in_list(word, lexer_mode.event_handler_db.keys(), comment, fh) __validate_required_token_policy_queue(word, fh, pos) continue_f = True if word == "on_end_of_stream": # When a termination token is sent, no other token shall follow. # => Enforce return from the analyzer! Do not allow CONTINUE! continue_f = False new_mode.events[word] = code_fragment.parse(fh, "%s::%s event handler" % (new_mode.name, word), ContinueF=continue_f) return True
def __parse_section(fh, descriptor, already_defined_list): global token_type_code_fragment_db assert type(already_defined_list) == list SubsectionList = ["name", "file_name", "standard", "distinct", "union", "inheritable", "noid"] \ + token_type_code_fragment_db.keys() position = fh.tell() skip_whitespace(fh) word = read_identifier(fh) if word == "": fh.seek(position) if check(fh, "}"): fh.seek(position) return False error_msg("Missing token_type section ('standard', 'distinct', or 'union').", fh) verify_word_in_list(word, SubsectionList, "Subsection '%s' not allowed in token_type section." % word, fh) if word == "name": if not check(fh, "="): error_msg("Missing '=' in token_type 'name' specification.", fh) descriptor.class_name, descriptor.name_space, descriptor.class_name_safe = read_namespaced_name(fh, "token_type") if not check(fh, ";"): error_msg("Missing terminating ';' in token_type 'name' specification.", fh) elif word == "inheritable": descriptor.open_for_derivation_f = True check_or_quit(fh, ";") elif word == "noid": descriptor.token_contains_token_id_f = False; check_or_quit(fh, ";") elif word == "file_name": if not check(fh, "="): error_msg("Missing '=' in token_type 'file_name' specification.", fh) descriptor.set_file_name(read_until_letter(fh, ";")) if not check(fh, ";"): error_msg("Missing terminating ';' in token_type 'file_name' specification.", fh) elif word in ["standard", "distinct", "union"]: if word == "standard": parse_standard_members(fh, descriptor, already_defined_list) elif word == "distinct": parse_distinct_members(fh, descriptor, already_defined_list) elif word == "union": parse_union_members(fh, descriptor, already_defined_list) if not check(fh, "}"): fh.seek(position) error_msg("Missing closing '}' at end of token_type section '%s'." % word, fh); elif word in token_type_code_fragment_db.keys(): fragment = code_fragment.parse(fh, word, AllowBriefTokenSenderF=False) descriptor.__dict__[word] = fragment else: assert False, "This code section section should not be reachable because 'word'\n" + \ "was checked to fit in one of the 'elif' cases." return True
def parse_action_code(new_mode, fh, pattern, pattern_state_machine): position = fh.tell() try: skip_whitespace(fh) position = fh.tell() code_obj = code_fragment.parse(fh, "regular expression", ErrorOnFailureF=False) if code_obj != None: new_mode.add_match(pattern, code_obj, pattern_state_machine) return fh.seek(position) word = read_until_letter(fh, [";"]) if word == "PRIORITY-MARK": # This mark 'lowers' the priority of a pattern to the priority of the current # pattern index (important for inherited patterns, that have higher precedence). # The parser already constructed a state machine for the pattern that is to # be assigned a new priority. Since, this machine is not used, let us just # use its id. fh.seek(-1, 1) check_or_quit(fh, ";", ". Since quex version 0.33.5 this is required.") new_mode.add_match_priority(pattern, pattern_state_machine, pattern_state_machine.get_id(), fh.name, get_current_line_info_number(fh)) elif word == "DELETION": # This mark deletes any pattern that was inherited with the same 'name' fh.seek(-1, 1) check_or_quit(fh, ";", ". Since quex version 0.33.5 this is required.") new_mode.add_match_deletion(pattern, pattern_state_machine, fh.name, get_current_line_info_number(fh)) else: error_msg("Missing token '{', 'PRIORITY-MARK', 'DELETION', or '=>' after '%s'.\n" % pattern + \ "found: '%s'. Note, that since quex version 0.33.5 it is required to add a ';'\n" % word + \ "to the commands PRIORITY-MARK and DELETION.", fh) except EndOfStreamException: fh.seek(position) error_msg("End of file reached while parsing action code for pattern.", fh)
def check_for_event_specification(word, fh, new_mode): if word == "on_entry": # Event: enter into mode new_mode.on_entry = code_fragment.parse( fh, "%s::on_entry event handler" % new_mode.name) return True elif word == "on_exit": # Event: exit from mode new_mode.on_exit = code_fragment.parse( fh, "%s::on_exit event handler" % new_mode.name) return True elif word == "on_match": # Event: exit from mode new_mode.on_match = code_fragment.parse( fh, "%s::on_match event handler" % new_mode.name) return True elif word == "on_indentation": # Event: start of indentation, # first non-whitespace after whitespace new_mode.on_indentation = code_fragment.parse( fh, "%s::on_indentation event handler" % new_mode.name) return True elif word == "on_failure" or word == "<<FAIL>>": # Event: No pattern matched for current position. # NOTE: See 'on_end_of_stream' comments. new_mode.on_failure = code_fragment.parse( fh, "%s::on_failure event handler" % new_mode.name) return True elif word == "on_end_of_stream" or word == "<<EOF>>": # Event: End of data stream / end of file # NOTE: The regular expression parser relies on <<EOF>> and <<FAIL>>. So those # patterns are entered here, even if later versions of quex might dismiss # those rule deefinitions in favor of consistent event handlers. new_mode.on_end_of_stream = code_fragment.parse( fh, "%s::on_end_of_stream event handler" % new_mode.name) return True elif len(word) >= 3 and word[:3] == "on_": error_msg("Unknown event handler '%s'. Known event handlers are:\n\n" % word + \ "on_entry, on_exit, on_indentation, on_end_of_stream, on_failure. on_match\n\n" + \ "Note, that any pattern starting with 'on_' is considered an event handler.\n" + \ "use double quotes to bracket patterns that start with 'on_'.", fh) # word was not an event specification return False
def check_for_event_specification(word, fh, new_mode): if word == "on_entry": # Event: enter into mode new_mode.on_entry = code_fragment.parse(fh, "%s::on_entry event handler" % new_mode.name) return True elif word == "on_exit": # Event: exit from mode new_mode.on_exit = code_fragment.parse(fh, "%s::on_exit event handler" % new_mode.name) return True elif word == "on_match": # Event: exit from mode new_mode.on_match = code_fragment.parse(fh, "%s::on_match event handler" % new_mode.name) return True elif word == "on_indentation": # Event: start of indentation, # first non-whitespace after whitespace new_mode.on_indentation = code_fragment.parse(fh, "%s::on_indentation event handler" % new_mode.name) return True elif word == "on_failure" or word == "<<FAIL>>": # Event: No pattern matched for current position. # NOTE: See 'on_end_of_stream' comments. new_mode.on_failure = code_fragment.parse(fh, "%s::on_failure event handler" % new_mode.name) return True elif word == "on_end_of_stream" or word == "<<EOF>>": # Event: End of data stream / end of file # NOTE: The regular expression parser relies on <<EOF>> and <<FAIL>>. So those # patterns are entered here, even if later versions of quex might dismiss # those rule deefinitions in favor of consistent event handlers. new_mode.on_end_of_stream = code_fragment.parse(fh, "%s::on_end_of_stream event handler" % new_mode.name) return True elif len(word) >= 3 and word[:3] == "on_": error_msg("Unknown event handler '%s'. Known event handlers are:\n\n" % word + \ "on_entry, on_exit, on_indentation, on_end_of_stream, on_failure. on_match\n\n" + \ "Note, that any pattern starting with 'on_' is considered an event handler.\n" + \ "use double quotes to bracket patterns that start with 'on_'.", fh) # word was not an event specification return False
def __parse_section(fh, descriptor, already_defined_list): global token_type_code_fragment_db assert type(already_defined_list) == list SubsectionList = ["name", "file_name", "standard", "distinct", "union", "inheritable", "noid"] \ + token_type_code_fragment_db.keys() position = fh.tell() skip_whitespace(fh) word = read_identifier(fh) if word == "": fh.seek(position) if check(fh, "}"): fh.seek(position) return False error_msg( "Missing token_type section ('standard', 'distinct', or 'union').", fh) verify_word_in_list( word, SubsectionList, "Subsection '%s' not allowed in token_type section." % word, fh) if word == "name": if not check(fh, "="): error_msg("Missing '=' in token_type 'name' specification.", fh) descriptor.class_name, descriptor.name_space, descriptor.class_name_safe = read_namespaced_name( fh, "token_type") if not check(fh, ";"): error_msg( "Missing terminating ';' in token_type 'name' specification.", fh) elif word == "inheritable": descriptor.open_for_derivation_f = True check_or_quit(fh, ";") elif word == "noid": descriptor.token_contains_token_id_f = False check_or_quit(fh, ";") elif word == "file_name": if not check(fh, "="): error_msg("Missing '=' in token_type 'file_name' specification.", fh) descriptor.set_file_name(read_until_letter(fh, ";")) if not check(fh, ";"): error_msg( "Missing terminating ';' in token_type 'file_name' specification.", fh) elif word in ["standard", "distinct", "union"]: if word == "standard": parse_standard_members(fh, descriptor, already_defined_list) elif word == "distinct": parse_distinct_members(fh, descriptor, already_defined_list) elif word == "union": parse_union_members(fh, descriptor, already_defined_list) if not check(fh, "}"): fh.seek(position) error_msg( "Missing closing '}' at end of token_type section '%s'." % word, fh) elif word in token_type_code_fragment_db.keys(): fragment = code_fragment.parse(fh, word, AllowBriefTokenSenderF=False) descriptor.__dict__[word] = fragment else: assert False, "This code section section should not be reachable because 'word'\n" + \ "was checked to fit in one of the 'elif' cases." return True
def parse_section(fh): global default_token_type_definition_triggered_by_mode_definition_f # NOTE: End of File is supposed to be reached when trying to read a new # section. Thus, the end-of-file catcher does not encompass the beginning. position = fh.tell() skip_whitespace(fh) word = read_identifier(fh) if word == "": error_msg("Missing section title.", fh) SectionTitleList = ["start", "define", "token", "mode", "repeated_token", "token_type" ] + lexer_mode.fragment_db.keys() verify_word_in_list(word, SectionTitleList, "Unknown quex section '%s'" % word, fh) try: # (*) determine what is defined # # -- 'mode { ... }' => define a mode # -- 'start = ...;' => define the name of the initial mode # -- 'header { ... }' => define code that is to be pasted on top # of the engine (e.g. "#include<...>") # -- 'body { ... }' => define code that is to be pasted in the class' body # of the engine (e.g. "public: int my_member;") # -- 'init { ... }' => define code that is to be pasted in the class' constructors # of the engine (e.g. "my_member = -1;") # -- 'define { ... }' => define patterns shorthands such as IDENTIFIER for [a-z]+ # -- 'repeated_token_id = QUEX_TKN_ ...;' => enables token repetition, defines # the token id to be repeated. # -- 'token { ... }' => define token ids # -- 'token_type { ... }' => define a customized token type # if word in lexer_mode.fragment_db.keys(): element_name = lexer_mode.fragment_db[word] fragment = code_fragment.parse(fh, word, AllowBriefTokenSenderF=False) lexer_mode.__dict__[element_name] = fragment return elif word == "start": mode_name = parse_identifier_assignment(fh) if mode_name == "": error_msg("Missing mode_name after 'start ='", fh) elif lexer_mode.initial_mode.get_pure_code() != "": error_msg("start mode defined more than once!", fh, DontExitF=True) error_msg("previously defined here", lexer_mode.initial_mode.filename, lexer_mode.initial_mode.line_n) lexer_mode.initial_mode = UserCodeFragment(mode_name, fh.name, get_current_line_info_number(fh)) return elif word == "repeated_token": lexer_mode.token_repetition_token_id_list = parse_token_id_definitions(fh, NamesOnlyF=True) for token_name in lexer_mode.token_repetition_token_id_list: verify_word_in_list(token_name[len(Setup.token_id_prefix):], lexer_mode.token_id_db.keys(), "Token ID '%s' not yet defined." % token_name, fh, ExitF=False) return elif word == "define": parse_pattern_name_definitions(fh) return elif word == "token": parse_token_id_definitions(fh) return elif word == "token_type": if Setup.token_class_file != "": error_msg("Token type definition inadmissible while specifying on the command line\n" + \ "the file %s to contain a manually written token class." % repr(Setup.token_class_file), fh) if lexer_mode.token_type_definition == None: lexer_mode.token_type_definition = token_type_definition.parse(fh) return # Error case: if default_token_type_definition_triggered_by_mode_definition_f: error_msg("Section 'token_type' must appear before first mode definition.", fh) else: error_msg("Section 'token_type' has been defined twice.", fh, DontExitF=True) error_msg("Previously defined here.", lexer_mode.token_type_definition.file_name_of_token_type_definition, lexer_mode.token_type_definition.line_n_of_token_type_definition) return elif word == "mode": # When the first mode is parsed then a token_type definition must be # present. If not, the default token type definition is considered. if lexer_mode.token_type_definition == None: sub_fh = open_file_or_die(os.environ["QUEX_PATH"] + Setup.language_db["$code_base"] + Setup.language_db["$token-default-file"]) parse_section(sub_fh) sub_fh.close() default_token_type_definition_triggered_by_mode_definition_f = True mode_definition.parse(fh) return else: # This case should have been caught by the 'verify_word_in_list' function assert False except EndOfStreamException: fh.seek(position) error_msg("End of file reached while parsing '%s' section" % word, fh)