def __parse_event(new_mode, fh, word): pos = fh.tell() # Allow '<<EOF>>' and '<<FAIL>>' out of respect for classical tools like 'lex' if word == "<<EOF>>": word = "on_end_of_stream" elif word == "<<FAIL>>": word = "on_failure" elif word in blackboard.all_section_title_list: error.log("Pattern '%s' is a quex section title. Has the closing '}' of mode %s \n" % (word, new_mode.name) \ + "been forgotten? Else use quotes, i.e. \"%s\"." % word, fh) elif len(word) < 3 or word[:3] != "on_": return False comment = "Unknown event handler '%s'. \n" % word + \ "Note, that any pattern starting with 'on_' is considered an event handler.\n" + \ "use double quotes to bracket patterns that start with 'on_'." __general_validate(fh, new_mode, word, pos) error.verify_word_in_list(word, standard_incidence_db.keys(), comment, fh) __validate_required_token_policy_queue(word, fh, pos) continue_f = True if word == "on_end_of_stream" or word == "on_failure": # -- When a termination token is sent, no other token shall follow. # => Enforce return from the analyzer! Do not allow CONTINUE! # -- When an 'on_failure' is received allow immediate action of the # receiver => Do not allow CONTINUE! continue_f = False new_mode.incidence_db[word] = \ code_fragment.parse(fh, "%s::%s event handler" % (new_mode.name, word), ContinueF=continue_f) return True
def __parse_section(fh, descriptor, already_defined_list): global token_type_code_fragment_db assert type(already_defined_list) == list SubsectionList = ["name", "file_name", "standard", "distinct", "union", "inheritable", "noid"] \ + token_type_code_fragment_db.keys() position = fh.tell() skip_whitespace(fh) word = read_identifier(fh) if word == "": fh.seek(position) if check(fh, "}"): fh.seek(position) return False error_msg("Missing token_type section ('standard', 'distinct', or 'union').", fh) verify_word_in_list(word, SubsectionList, "Subsection '%s' not allowed in token_type section." % word, fh) if word == "name": if not check(fh, "="): error_msg("Missing '=' in token_type 'name' specification.", fh) descriptor.class_name, descriptor.name_space, descriptor.class_name_safe = read_namespaced_name(fh, "token_type") if not check(fh, ";"): error_msg("Missing terminating ';' in token_type 'name' specification.", fh) elif word == "inheritable": descriptor.open_for_derivation_f = True check_or_die(fh, ";") elif word == "noid": descriptor.token_contains_token_id_f = False; check_or_die(fh, ";") elif word == "file_name": if not check(fh, "="): error_msg("Missing '=' in token_type 'file_name' specification.", fh) descriptor.set_file_name(read_until_letter(fh, ";")) if not check(fh, ";"): error_msg("Missing terminating ';' in token_type 'file_name' specification.", fh) elif word in ["standard", "distinct", "union"]: if word == "standard": parse_standard_members(fh, word, descriptor, already_defined_list) elif word == "distinct": parse_distinct_members(fh, word, descriptor, already_defined_list) elif word == "union": parse_union_members(fh, word, descriptor, already_defined_list) if not check(fh, "}"): fh.seek(position) error_msg("Missing closing '}' at end of token_type section '%s'." % word, fh); elif word in token_type_code_fragment_db.keys(): fragment = code_fragment.parse(fh, word, AllowBriefTokenSenderF=False) descriptor.__dict__[word] = fragment else: assert False, "This code section section should not be reachable because 'word'\n" + \ "was checked to fit in one of the 'elif' cases." return True
def __parse_event(new_mode, fh, word): pos = fh.tell() # Allow '<<EOF>>' and '<<FAIL>>' out of respect for classical tools like 'lex' if word == "<<EOF>>": word = "on_end_of_stream" elif word == "<<FAIL>>": word = "on_failure" elif word in blackboard.all_section_title_list: error_msg("Pattern '%s' is a quex section title. Has the closing '}' of mode %s \n" % (word, new_mode.name) \ + "been forgotten? Else use quotes, i.e. \"%s\"." % word, fh) elif len(word) < 3 or word[:3] != "on_": return False comment = "Unknown event handler '%s'. \n" % word + \ "Note, that any pattern starting with 'on_' is considered an event handler.\n" + \ "use double quotes to bracket patterns that start with 'on_'." __general_validate(fh, new_mode, word, pos) verify_word_in_list(word, event_handler_db.keys(), comment, fh) __validate_required_token_policy_queue(word, fh, pos) continue_f = True if word == "on_end_of_stream": # When a termination token is sent, no other token shall follow. # => Enforce return from the analyzer! Do not allow CONTINUE! continue_f = False new_mode.events[word] = code_fragment.parse(fh, "%s::%s event handler" % (new_mode.name, word), ContinueF=continue_f) return True
def __parse_event(new_mode, fh, word): pos = fh.tell() # Allow '<<EOF>>' and '<<FAIL>>' out of respect for classical tools like 'lex' if word == "<<EOF>>": word = "on_end_of_stream" elif word == "<<FAIL>>": word = "on_failure" elif word in blackboard.all_section_title_list: error.log("Pattern '%s' is a quex section title. Has the closing '}' of mode %s \n" % (word, new_mode.name) \ + "been forgotten? Else use quotes, i.e. \"%s\"." % word, fh) elif len(word) < 3 or word[:3] != "on_": return False comment = "Unknown event handler '%s'. \n" % word + \ "Note, that any pattern starting with 'on_' is considered an event handler.\n" + \ "use double quotes to bracket patterns that start with 'on_'." __general_validate(fh, new_mode, word, pos) error.verify_word_in_list(word, standard_incidence_db.keys() + ["keyword_list"], comment, fh) code = code_fragment.parse(fh, "%s::%s event handler" % (new_mode.name, word)) incidence_id = standard_incidence_db[word][0] if Lng.suspicious_RETURN_in_event_handler(incidence_id, code.get_text()): error.warning("Suspicious 'RETURN' in event handler '%s'.\n" % incidence_id \ + "This statement will trigger 'on_after_match' handler.\n" \ + "May be, use plain return instead.", code.sr) if word == "on_n_dedent" and not token_db.support_repetition(): error.warning("Found 'on_n_dedent', but no single token has been specified\n" \ "in a 'repeated_token' section.", code.sr) new_mode.incidence_db[word] = code return True
def __parse_action(new_mode, fh, pattern_str, pattern): position = fh.tell() try: skip_whitespace(fh) position = fh.tell() code_obj = code_fragment.parse(fh, "regular expression", ErrorOnFailureF=False) if code_obj is not None: new_mode.add_match(pattern_str, code_obj, pattern) return fh.seek(position) word = read_until_letter(fh, [";"]) if word == "PRIORITY-MARK": # This mark 'lowers' the priority of a pattern to the priority of the current # pattern index (important for inherited patterns, that have higher precedence). # The parser already constructed a state machine for the pattern that is to # be assigned a new priority. Since, this machine is not used, let us just # use its id. fh.seek(-1, 1) check_or_die(fh, ";", ". Since quex version 0.33.5 this is required.") new_mode.add_match_priority(pattern_str, pattern, pattern.sm.get_id(), fh.name, get_current_line_info_number(fh)) elif word == "DELETION": # This mark deletes any pattern that was inherited with the same 'name' fh.seek(-1, 1) check_or_die(fh, ";", ". Since quex version 0.33.5 this is required.") new_mode.add_match_deletion(pattern_str, pattern, fh.name, get_current_line_info_number(fh)) else: error_msg("Missing token '{', 'PRIORITY-MARK', 'DELETION', or '=>' after '%s'.\n" % pattern_str + \ "found: '%s'. Note, that since quex version 0.33.5 it is required to add a ';'\n" % word + \ "to the commands PRIORITY-MARK and DELETION.", fh) except EndOfStreamException: fh.seek(position) error_msg("End of file reached while parsing action code for pattern.", fh)
def parse_section(fh): global default_token_type_definition_triggered_by_mode_definition_f # NOTE: End of File is supposed to be reached when trying to read a new # section. Thus, the end-of-file catcher does not encompass the beginning. position = fh.tell() skip_whitespace(fh) word = read_identifier(fh, OnMissingStr="Missing section title") error.verify_word_in_list(word, blackboard.all_section_title_list, "Unknown quex section '%s'" % word, fh) try: # (*) determine what is defined # # -- 'mode { ... }' => define a mode # -- 'start = ...;' => define the name of the initial mode # -- 'header { ... }' => define code that is to be pasted on top # of the engine (e.g. "#include<...>") # -- 'body { ... }' => define code that is to be pasted in the class' body # of the engine (e.g. "public: int my_member;") # -- 'init { ... }' => define code that is to be pasted in the class' constructors # of the engine (e.g. "my_member = -1;") # -- 'define { ... }' => define patterns shorthands such as IDENTIFIER for [a-z]+ # -- 'repeated_token_id = QUEX_TKN_ ...;' => enables token repetition, defines # the token id to be repeated. # -- 'token { ... }' => define token ids # -- 'token_type { ... }' => define a customized token type # if word in blackboard.fragment_db.keys(): element_name = blackboard.fragment_db[word] fragment = code_fragment.parse(fh, word, AllowBriefTokenSenderF=False) blackboard.__dict__[element_name] = fragment return elif word == "start": mode_name = parse_identifier_assignment(fh) if mode_name == "": error.log("Missing mode_name after 'start ='", fh) elif not blackboard.initial_mode.sr.is_void(): error.log("start mode defined more than once!", fh, DontExitF=True) error.log("previously defined here", blackboard.initial_mode.sr) blackboard.initial_mode = CodeUser(mode_name, SourceRef.from_FileHandle(fh)) return elif word == "repeated_token": blackboard.token_repetition_token_id_list = parse_token_id_definitions( fh, NamesOnlyF=True) for token_name in blackboard.token_repetition_token_id_list: error.verify_word_in_list( token_name[len(Setup.token_id_prefix):], blackboard.token_id_db.keys(), "Token ID '%s' not yet defined." % token_name, fh, ExitF=False, SuppressCode=NotificationDB. warning_repeated_token_not_yet_defined) return elif word == "define": parse_pattern_name_definitions(fh) return elif word == "token": if Setup.token_id_foreign_definition: error.log("Token id file '%s' has been specified.\n" \ % Setup.token_id_foreign_definition_file \ + "All token ids must be specified there. Section 'token'\n" \ + "is not allowed.", fh) parse_token_id_definitions(fh) return elif word == "token_type": if Setup.token_class_file != "": error.log("Section 'token_type' is intended to generate a token class.\n" \ + "However, the manually written token class file '%s'" \ % repr(Setup.token_class_file) \ + "has been specified on the command line.", fh) if blackboard.token_type_definition is None: blackboard.token_type_definition = token_type.parse(fh) return # Error case: if default_token_type_definition_triggered_by_mode_definition_f: error.log( "Section 'token_type' must appear before first mode definition.", fh) else: error.log("Section 'token_type' has been defined twice.", fh, DontExitF=True) error.log("Previously defined here.", blackboard.token_type_definition.sr) return elif word == "mode": # When the first mode is parsed then a token_type definition must be # present. If not, the default token type definition is considered. if blackboard.token_type_definition is None: parse_default_token_definition() default_token_type_definition_triggered_by_mode_definition_f = True mode.parse(fh) return else: # This case should have been caught by the 'verify_word_in_list' function assert False except EndOfStreamException: fh.seek(position) error.error_eof(word, fh)
def parse_section(fh): global default_token_type_definition_triggered_by_mode_definition_f # NOTE: End of File is supposed to be reached when trying to read a new # section. Thus, the end-of-file catcher does not encompass the beginning. position = fh.tell() skip_whitespace(fh) word = read_identifier(fh, OnMissingStr="Missing section title") error.verify_word_in_list(word, blackboard.all_section_title_list, "Unknown quex section '%s'" % word, fh) try: # (*) determine what is defined # # -- 'mode { ... }' => define a mode # -- 'start = ...;' => define the name of the initial mode # -- 'header { ... }' => define code that is to be pasted on top # of the engine (e.g. "#include<...>") # -- 'body { ... }' => define code that is to be pasted in the class' body # of the engine (e.g. "public: int my_member;") # -- 'init { ... }' => define code that is to be pasted in the class' constructors # of the engine (e.g. "my_member = -1;") # -- 'define { ... }' => define patterns shorthands such as IDENTIFIER for [a-z]+ # -- 'repeated_token_id = QUEX_TKN_ ...;' => enables token repetition, defines # the token id to be repeated. # -- 'token { ... }' => define token ids # -- 'token_type { ... }' => define a customized token type # if word in blackboard.fragment_db.keys(): element_name = blackboard.fragment_db[word] fragment = code_fragment.parse(fh, word, AllowBriefTokenSenderF=False) blackboard.__dict__[element_name] = fragment return elif word == "start": mode_name = parse_identifier_assignment(fh) if mode_name == "": error.log("Missing mode_name after 'start ='", fh) elif not blackboard.initial_mode.sr.is_void(): error.log("start mode defined more than once!", fh, DontExitF=True) error.log("previously defined here", blackboard.initial_mode.sr) blackboard.initial_mode = CodeUser(mode_name, SourceRef.from_FileHandle(fh)) return elif word == "repeated_token": blackboard.token_repetition_token_id_list = parse_token_id_definitions(fh, NamesOnlyF=True) for token_name in blackboard.token_repetition_token_id_list: error.verify_word_in_list(token_name[len(Setup.token_id_prefix):], blackboard.token_id_db.keys(), "Token ID '%s' not yet defined." % token_name, fh, ExitF=False, SuppressCode=NotificationDB.warning_repeated_token_not_yet_defined) return elif word == "define": parse_pattern_name_definitions(fh) return elif word == "token": if Setup.token_id_foreign_definition: error.log("Token id file '%s' has been specified.\n" \ % Setup.token_id_foreign_definition_file \ + "All token ids must be specified there. Section 'token'\n" \ + "is not allowed.", fh) parse_token_id_definitions(fh) return elif word == "token_type": if Setup.token_class_file != "": error.log("Section 'token_type' is intended to generate a token class.\n" \ + "However, the manually written token class file '%s'" \ % repr(Setup.token_class_file) \ + "has been specified on the command line.", fh) if blackboard.token_type_definition is None: blackboard.token_type_definition = token_type.parse(fh) return # Error case: if default_token_type_definition_triggered_by_mode_definition_f: error.log("Section 'token_type' must appear before first mode definition.", fh) else: error.log("Section 'token_type' has been defined twice.", fh, DontExitF=True) error.log("Previously defined here.", blackboard.token_type_definition.sr) return elif word == "mode": # When the first mode is parsed then a token_type definition must be # present. If not, the default token type definition is considered. if blackboard.token_type_definition is None: parse_default_token_definition() default_token_type_definition_triggered_by_mode_definition_f = True mode.parse(fh) return else: # This case should have been caught by the 'verify_word_in_list' function assert False except EndOfStreamException: fh.seek(position) error.error_eof(word, fh)
def __parse_section(fh, descriptor, already_defined_list): global token_type_code_fragment_db assert type(already_defined_list) == list SubsectionList = ["name", "file_name", "standard", "distinct", "union", "inheritable", "noid"] \ + token_type_code_fragment_db.keys() position = fh.tell() skip_whitespace(fh) word = read_identifier(fh) if word == "": fh.seek(position) if check(fh, "}"): fh.seek(position) return False error_msg( "Missing token_type section ('standard', 'distinct', or 'union').", fh) verify_word_in_list( word, SubsectionList, "Subsection '%s' not allowed in token_type section." % word, fh) if word == "name": if not check(fh, "="): error_msg("Missing '=' in token_type 'name' specification.", fh) descriptor.class_name, descriptor.name_space, descriptor.class_name_safe = read_namespaced_name( fh, "token_type") if not check(fh, ";"): error_msg( "Missing terminating ';' in token_type 'name' specification.", fh) elif word == "inheritable": descriptor.open_for_derivation_f = True check_or_die(fh, ";") elif word == "noid": descriptor.token_contains_token_id_f = False check_or_die(fh, ";") elif word == "file_name": if not check(fh, "="): error_msg("Missing '=' in token_type 'file_name' specification.", fh) descriptor.set_file_name(read_until_letter(fh, ";")) if not check(fh, ";"): error_msg( "Missing terminating ';' in token_type 'file_name' specification.", fh) elif word in ["standard", "distinct", "union"]: if word == "standard": parse_standard_members(fh, word, descriptor, already_defined_list) elif word == "distinct": parse_distinct_members(fh, word, descriptor, already_defined_list) elif word == "union": parse_union_members(fh, word, descriptor, already_defined_list) if not check(fh, "}"): fh.seek(position) error_msg( "Missing closing '}' at end of token_type section '%s'." % word, fh) elif word in token_type_code_fragment_db.keys(): fragment = code_fragment.parse(fh, word, AllowBriefTokenSenderF=False) descriptor.__dict__[word] = fragment else: assert False, "This code section section should not be reachable because 'word'\n" + \ "was checked to fit in one of the 'elif' cases." return True
def parse_section(fh): global default_token_type_definition_triggered_by_mode_definition_f # NOTE: End of File is supposed to be reached when trying to read a new # section. Thus, the end-of-file catcher does not encompass the beginning. position = fh.tell() skip_whitespace(fh) word = read_identifier(fh) if word == "": error_msg("Missing section title.", fh) verify_word_in_list(word, blackboard.all_section_title_list, "Unknown quex section '%s'" % word, fh) try: # (*) determine what is defined # # -- 'mode { ... }' => define a mode # -- 'start = ...;' => define the name of the initial mode # -- 'header { ... }' => define code that is to be pasted on top # of the engine (e.g. "#include<...>") # -- 'body { ... }' => define code that is to be pasted in the class' body # of the engine (e.g. "public: int my_member;") # -- 'init { ... }' => define code that is to be pasted in the class' constructors # of the engine (e.g. "my_member = -1;") # -- 'define { ... }' => define patterns shorthands such as IDENTIFIER for [a-z]+ # -- 'repeated_token_id = QUEX_TKN_ ...;' => enables token repetition, defines # the token id to be repeated. # -- 'token { ... }' => define token ids # -- 'token_type { ... }' => define a customized token type # if word in blackboard.fragment_db.keys(): element_name = blackboard.fragment_db[word] fragment = code_fragment.parse(fh, word, AllowBriefTokenSenderF=False) blackboard.__dict__[element_name] = fragment return elif word == "start": mode_name = parse_identifier_assignment(fh) if mode_name == "": error_msg("Missing mode_name after 'start ='", fh) elif blackboard.initial_mode.get_pure_code() != "": error_msg("start mode defined more than once!", fh, DontExitF=True) error_msg("previously defined here", blackboard.initial_mode.filename, blackboard.initial_mode.line_n) blackboard.initial_mode = UserCodeFragment( mode_name, fh.name, get_current_line_info_number(fh)) return elif word == "repeated_token": blackboard.token_repetition_token_id_list = parse_token_id_definitions( fh, NamesOnlyF=True) for token_name in blackboard.token_repetition_token_id_list: verify_word_in_list(token_name[len(Setup.token_id_prefix):], blackboard.token_id_db.keys(), "Token ID '%s' not yet defined." % token_name, fh, ExitF=False) return elif word == "define": parse_pattern_name_definitions(fh) return elif word == "token": parse_token_id_definitions(fh) return elif word == "token_type": if Setup.token_class_file != "": error_msg("Token type definition inadmissible while specifying on the command line\n" + \ "the file %s to contain a manually written token class." % repr(Setup.token_class_file), fh) if blackboard.token_type_definition is None: blackboard.token_type_definition = token_type.parse(fh) return # Error case: if default_token_type_definition_triggered_by_mode_definition_f: error_msg( "Section 'token_type' must appear before first mode definition.", fh) else: error_msg("Section 'token_type' has been defined twice.", fh, DontExitF=True) error_msg( "Previously defined here.", blackboard. token_type_definition.file_name_of_token_type_definition, blackboard.token_type_definition. line_n_of_token_type_definition) return elif word == "mode": # When the first mode is parsed then a token_type definition must be # present. If not, the default token type definition is considered. if blackboard.token_type_definition is None: parse_default_token_definition() default_token_type_definition_triggered_by_mode_definition_f = True mode.parse(fh) return else: # This case should have been caught by the 'verify_word_in_list' function assert False except EndOfStreamException: fh.seek(position) error_msg("End of file reached while parsing '%s' section" % word, fh)
def parse_section(fh): global default_token_type_definition_triggered_by_mode_definition_f # NOTE: End of File is supposed to be reached when trying to read a new # section. Thus, the end-of-file catcher does not encompass the beginning. position = fh.tell() skip_whitespace(fh) word = read_identifier(fh) if word == "": error_msg("Missing section title.", fh) verify_word_in_list(word, blackboard.all_section_title_list, "Unknown quex section '%s'" % word, fh) try: # (*) determine what is defined # # -- 'mode { ... }' => define a mode # -- 'start = ...;' => define the name of the initial mode # -- 'header { ... }' => define code that is to be pasted on top # of the engine (e.g. "#include<...>") # -- 'body { ... }' => define code that is to be pasted in the class' body # of the engine (e.g. "public: int my_member;") # -- 'init { ... }' => define code that is to be pasted in the class' constructors # of the engine (e.g. "my_member = -1;") # -- 'define { ... }' => define patterns shorthands such as IDENTIFIER for [a-z]+ # -- 'repeated_token_id = QUEX_TKN_ ...;' => enables token repetition, defines # the token id to be repeated. # -- 'token { ... }' => define token ids # -- 'token_type { ... }' => define a customized token type # if word in blackboard.fragment_db.keys(): element_name = blackboard.fragment_db[word] fragment = code_fragment.parse(fh, word, AllowBriefTokenSenderF=False) blackboard.__dict__[element_name] = fragment return elif word == "start": mode_name = parse_identifier_assignment(fh) if mode_name == "": error_msg("Missing mode_name after 'start ='", fh) elif blackboard.initial_mode.get_pure_code() != "": error_msg("start mode defined more than once!", fh, DontExitF=True) error_msg("previously defined here", blackboard.initial_mode.filename, blackboard.initial_mode.line_n) blackboard.initial_mode = UserCodeFragment(mode_name, fh.name, get_current_line_info_number(fh)) return elif word == "repeated_token": blackboard.token_repetition_token_id_list = parse_token_id_definitions(fh, NamesOnlyF=True) for token_name in blackboard.token_repetition_token_id_list: verify_word_in_list(token_name[len(Setup.token_id_prefix):], blackboard.token_id_db.keys(), "Token ID '%s' not yet defined." % token_name, fh, ExitF=False) return elif word == "define": parse_pattern_name_definitions(fh) return elif word == "token": parse_token_id_definitions(fh) return elif word == "token_type": if Setup.token_class_file != "": error_msg("Token type definition inadmissible while specifying on the command line\n" + \ "the file %s to contain a manually written token class." % repr(Setup.token_class_file), fh) if blackboard.token_type_definition is None: blackboard.token_type_definition = token_type.parse(fh) return # Error case: if default_token_type_definition_triggered_by_mode_definition_f: error_msg("Section 'token_type' must appear before first mode definition.", fh) else: error_msg("Section 'token_type' has been defined twice.", fh, DontExitF=True) error_msg("Previously defined here.", blackboard.token_type_definition.file_name_of_token_type_definition, blackboard.token_type_definition.line_n_of_token_type_definition) return elif word == "mode": # When the first mode is parsed then a token_type definition must be # present. If not, the default token type definition is considered. if blackboard.token_type_definition is None: parse_default_token_definition() default_token_type_definition_triggered_by_mode_definition_f = True mode.parse(fh) return else: # This case should have been caught by the 'verify_word_in_list' function assert False except EndOfStreamException: fh.seek(position) error_msg("End of file reached while parsing '%s' section" % word, fh)