def do_implementation(ModeDB): FileTemplate = os.path.normpath(QUEX_PATH + Setup.language_db["$code_base"] + "/analyzer/TXT-Cpp.i") func_txt = get_file_content_or_die(FileTemplate) func_txt = blue_print(func_txt, [ [ "$$CONSTRUCTOR_EXTENSTION$$", blackboard.class_constructor_extension.get_code() ], [ "$$CONVERTER_HELPER_I$$", Setup.get_file_reference(Setup.output_buffer_codec_header_i) ], [ "$$CONSTRUCTOR_MODE_DB_INITIALIZATION_CODE$$", get_constructor_code(ModeDB.values()) ], [ "$$MEMENTO_EXTENSIONS_PACK$$", blackboard.memento_pack_extension.get_code() ], [ "$$MEMENTO_EXTENSIONS_UNPACK$$", blackboard.memento_unpack_extension.get_code() ], ]) return func_txt
def _do(UnicodeTrafoInfo): """ PURPOSE: Writes converters for conversion towards UTF8/UTF16/UCS2/UCS4. UnicodeTrafoInfo: Provides the information about the relation of character codes in a particular coding to unicode character codes. It is provided in the following form: # Codec Values Unicode Values [ (Source0_Begin, Source0_End, TargetInterval0_Begin), (Source1_Begin, Source1_End, TargetInterval1_Begin), (Source2_Begin, Source2_End, TargetInterval2_Begin), ... ] """ codec_name = make_safe_identifier(UnicodeTrafoInfo.name).lower() utf8_epilog, utf8_function_body = ConverterWriterUTF8().do(UnicodeTrafoInfo) utf16_prolog, utf16_function_body = ConverterWriterUTF16().do(UnicodeTrafoInfo) dummy, utf32_function_body = ConverterWriterUTF32().do(UnicodeTrafoInfo) # Provide only the constant which are necessary FileName = os.path.normpath( QUEX_PATH + Lng["$code_base"] + "/converter_helper/TXT-from-codec-buffer.i") codec_header = Setup.get_file_reference(Setup.output_buffer_codec_header) txt_i = blue_print(get_file_content_or_die(FileName), [["$$CODEC$$", codec_name], ["$$EPILOG$$", utf8_epilog], ["$$CODEC_HEADER$$", codec_header], ["$$BODY_UTF8$$", utf8_function_body], ["$$BODY_UTF16$$", utf16_function_body], ["$$BODY_UTF32$$", utf32_function_body]]) # A separate declaration header is required FileName = os.path.normpath( QUEX_PATH + Lng["$code_base"] + "/converter_helper/TXT-from-codec-buffer") template_h_txt = get_file_content_or_die(FileName) txt_h = template_h_txt.replace("$$CODEC$$", codec_name) return txt_h, txt_i
def _do(UnicodeTrafoInfo, CodecName): """ PURPOSE: Writes converters for conversion towards UTF8/UTF16/UCS2/UCS4. UnicodeTrafoInfo: Provides the information about the relation of character codes in a particular coding to unicode character codes. It is provided in the following form: # Codec Values Unicode Values [ (Source0_Begin, Source0_End, TargetInterval0_Begin), (Source1_Begin, Source1_End, TargetInterval1_Begin), (Source2_Begin, Source2_End, TargetInterval2_Begin), ... ] """ codec_name = make_safe_identifier(CodecName).lower() utf8_epilog, utf8_function_body = ConverterWriterUTF8().do( UnicodeTrafoInfo) utf16_prolog, utf16_function_body = ConverterWriterUTF16().do( UnicodeTrafoInfo) dummy, utf32_function_body = ConverterWriterUTF32().do(UnicodeTrafoInfo) # Provide only the constant which are necessary FileName = os.path.normpath(QUEX_PATH + Setup.language_db["$code_base"] + "/converter_helper/TXT-from-codec-buffer.i") codec_header = Setup.get_file_reference(Setup.output_buffer_codec_header) txt_i = blue_print(get_file_content_or_die(FileName), [["$$CODEC$$", codec_name], ["$$EPILOG$$", utf8_epilog], ["$$CODEC_HEADER$$", codec_header], ["$$BODY_UTF8$$", utf8_function_body], ["$$BODY_UTF16$$", utf16_function_body], ["$$BODY_UTF32$$", utf32_function_body]]) # A separate declaration header is required FileName = os.path.normpath(QUEX_PATH + Setup.language_db["$code_base"] + "/converter_helper/TXT-from-codec-buffer") template_h_txt = get_file_content_or_die(FileName) txt_h = template_h_txt.replace("$$CODEC$$", codec_name) return txt_h, txt_i
def do_implementation(ModeDB): FileTemplate = os.path.normpath(QUEX_PATH + Lng["$code_base"] + "/analyzer/TXT-Cpp.i") func_txt = get_file_content_or_die(FileTemplate) func_txt = blue_print(func_txt, [ ["$$CONSTRUCTOR_EXTENSTION$$", Lng.SOURCE_REFERENCED(blackboard.class_constructor_extension)], ["$$CONVERTER_HELPER_I$$", Setup.get_file_reference(Setup.output_buffer_codec_header_i)], ["$$CONSTRUCTOR_MODE_DB_INITIALIZATION_CODE$$", get_constructor_code(ModeDB.values())], ["$$MEMENTO_EXTENSIONS_PACK$$", Lng.SOURCE_REFERENCED(blackboard.memento_pack_extension)], ["$$MEMENTO_EXTENSIONS_UNPACK$$", Lng.SOURCE_REFERENCED(blackboard.memento_unpack_extension)], ]) return func_txt
def get_supported_codec_list(IncludeAliasesF=False): assert type(IncludeAliasesF) == bool global _supported_codec_list if len(_supported_codec_list) != 0: if IncludeAliasesF: return _supported_codec_list_plus_aliases else: return _supported_codec_list file_name = QUEX_PATH + "/quex/engine/codec_db/database/00-SUPPORTED.txt" content = get_file_content_or_die(file_name) _supported_codec_list = content.split() _supported_codec_list.sort() codec_db_list = parser.get_codec_list_db() for codec_name, aliases_list, dummy in codec_db_list: if codec_name in _supported_codec_list: _supported_codec_list_plus_aliases.extend(filter(lambda x: x != "", aliases_list)) _supported_codec_list_plus_aliases.sort() if IncludeAliasesF: return _supported_codec_list_plus_aliases else: return _supported_codec_list
def get_supported_codec_list(IncludeAliasesF=False): assert type(IncludeAliasesF) == bool global __supported_codec_list if len(__supported_codec_list) != 0: if IncludeAliasesF: return __supported_codec_list_plus_aliases else: return __supported_codec_list file_name = QUEX_PATH + "/quex/engine/codec_db/database/00-SUPPORTED.txt" content = get_file_content_or_die(file_name) __supported_codec_list = content.split() __supported_codec_list.sort() codec_db_list = get_codec_list_db() for codec_name, aliases_list, dummy in codec_db_list: if codec_name in __supported_codec_list: __supported_codec_list_plus_aliases.extend(filter(lambda x: x != "", aliases_list)) __supported_codec_list_plus_aliases.sort() if IncludeAliasesF: return __supported_codec_list_plus_aliases else: return __supported_codec_list
def do(ModeDB): assert blackboard.token_type_definition is not None QuexClassHeaderFileTemplate = os.path.normpath( QUEX_PATH + Lng["$code_base"] + Lng["$analyzer_template_file"]).replace("//","/") LexerClassName = Setup.analyzer_class_name quex_converter_coding_name_str = Setup.converter_ucs_coding_name mode_id_definition_str = "" # NOTE: First mode-id needs to be '1' for compatibility with flex generated engines for i, info in enumerate(ModeDB.items()): name = info[0] mode = info[1] if mode.abstract_f(): continue mode_id_definition_str += " QUEX_NAME(ModeID_%s) = %i,\n" % (name, i) if mode_id_definition_str != "": mode_id_definition_str = mode_id_definition_str[:-2] # -- instances of mode classes as members of the lexer mode_object_members_txt, \ mode_specific_functions_txt, \ friend_txt = get_mode_class_related_code_fragments(ModeDB.values()) # -- define a pointer that directly has the type of the derived class if Setup.analyzer_derived_class_name != "": analyzer_derived_class_name = Setup.analyzer_derived_class_name derived_class_type_declaration = "class %s;" % Setup.analyzer_derived_class_name else: analyzer_derived_class_name = Setup.analyzer_class_name derived_class_type_declaration = "" token_class_file_name = blackboard.token_type_definition.get_file_name() token_class_name = blackboard.token_type_definition.class_name token_class_name_safe = blackboard.token_type_definition.class_name_safe template_code_txt = get_file_content_or_die(QuexClassHeaderFileTemplate) include_guard_ext = get_include_guard_extension( Lng.NAMESPACE_REFERENCE(Setup.analyzer_name_space) + "__" + Setup.analyzer_class_name) if len(Setup.token_id_foreign_definition_file) != 0: token_id_definition_file = Setup.token_id_foreign_definition_file else: token_id_definition_file = Setup.output_token_id_file lexer_name_space_safe = get_include_guard_extension(Lng.NAMESPACE_REFERENCE(Setup.analyzer_name_space)) txt = blue_print(template_code_txt, [ ["$$___SPACE___$$", " " * (len(LexerClassName) + 1)], ["$$CLASS_BODY_EXTENSION$$", Lng.SOURCE_REFERENCED(blackboard.class_body_extension)], ["$$CONVERTER_HELPER$$", Setup.get_file_reference(Setup.output_buffer_codec_header)], ["$$INCLUDE_GUARD_EXTENSION$$", include_guard_ext], ["$$LEXER_CLASS_NAME$$", LexerClassName], ["$$LEXER_NAME_SPACE$$", lexer_name_space_safe], ["$$LEXER_CLASS_NAME_SAFE$$", Setup.analyzer_name_safe], ["$$LEXER_CONFIG_FILE$$", Setup.get_file_reference(Setup.output_configuration_file)], ["$$LEXER_DERIVED_CLASS_DECL$$", derived_class_type_declaration], ["$$LEXER_DERIVED_CLASS_NAME$$", analyzer_derived_class_name], ["$$QUEX_MODE_ID_DEFINITIONS$$", mode_id_definition_str], ["$$MEMENTO_EXTENSIONS$$", Lng.SOURCE_REFERENCED(blackboard.memento_class_extension)], ["$$MODE_CLASS_FRIENDS$$", friend_txt], ["$$MODE_OBJECTS$$", mode_object_members_txt], ["$$MODE_SPECIFIC_ANALYSER_FUNCTIONS$$", mode_specific_functions_txt], ["$$PRETTY_INDENTATION$$", " " + " " * (len(LexerClassName)*2 + 2)], ["$$QUEX_TEMPLATE_DIR$$", QUEX_PATH + Lng["$code_base"]], ["$$QUEX_VERSION$$", QUEX_VERSION], ["$$TOKEN_CLASS_DEFINITION_FILE$$", Setup.get_file_reference(token_class_file_name)], ["$$TOKEN_CLASS$$", token_class_name], ["$$TOKEN_CLASS_NAME_SAFE$$", token_class_name_safe], ["$$TOKEN_ID_DEFINITION_FILE$$", Setup.get_file_reference(token_id_definition_file)], ["$$CORE_ENGINE_CHARACTER_CODING$$", quex_converter_coding_name_str], ["$$USER_DEFINED_HEADER$$", Lng.SOURCE_REFERENCED(blackboard.header) + "\n"], ]) return txt
def do(ModeDescriptionDB): IndentationSupportF = blackboard.required_support_indentation_count() BeginOfLineSupportF = blackboard.required_support_begin_of_line() LexerClassName = Setup.analyzer_class_name ConfigurationTemplateFile =( QUEX_PATH \ + Lng["$code_base"] \ + "/analyzer/configuration/TXT").replace("//","/") txt = get_file_content_or_die(ConfigurationTemplateFile) # -- check if exit/entry handlers have to be active entry_handler_active_f = False exit_handler_active_f = False for mode in ModeDescriptionDB.values(): entry_handler_active_f |= mode.incidence_db.has_key(E_IncidenceIDs.MODE_ENTRY) exit_handler_active_f |= mode.incidence_db.has_key(E_IncidenceIDs.MODE_EXIT) # Buffer filler converter (0x0 means: no buffer filler converter) converter_new_str = "# define QUEX_SETTING_BUFFER_FILLERS_CONVERTER_NEW " if Setup.converter_user_new_func != "": converter_new_str += Setup.converter_user_new_func + "()" else: converter_new_str = "/* " + converter_new_str + " */" # Token repetition support token_repeat_test_txt = "" for token_id_str in blackboard.token_repetition_token_id_list: token_repeat_test_txt += "TokenID == %s || " % token_id_str if token_repeat_test_txt != "": token_repeat_test_txt = token_repeat_test_txt[:-3] else: token_repeat_test_txt = "false" if Setup.analyzer_derived_class_name != "": analyzer_derived_class_name = Setup.analyzer_derived_class_name else: analyzer_derived_class_name = Setup.analyzer_class_name txt = __switch(txt, "QUEX_OPTION_COLUMN_NUMBER_COUNTING", Setup.count_column_number_f) txt = __switch(txt, "QUEX_OPTION_COMPUTED_GOTOS", False) txt = __switch(txt, "QUEX_OPTION_CONVERTER_ICONV", Setup.converter_iconv_f) txt = __switch(txt, "QUEX_OPTION_CONVERTER_ICU", Setup.converter_icu_f) txt = __switch(txt, "QUEX_OPTION_INCLUDE_STACK", Setup.include_stack_support_f) txt = __switch(txt, "QUEX_OPTION_LINE_NUMBER_COUNTING", Setup.count_line_number_f) txt = __switch(txt, "QUEX_OPTION_POST_CATEGORIZER", Setup.post_categorizer_f) txt = __switch(txt, "QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK", Setup.mode_transition_check_f) txt = __switch(txt, "QUEX_OPTION_STRING_ACCUMULATOR", Setup.string_accumulator_f) txt = __switch(txt, "QUEX_OPTION_TOKEN_POLICY_QUEUE", Setup.token_policy == "queue") txt = __switch(txt, "QUEX_OPTION_TOKEN_POLICY_SINGLE", Setup.token_policy == "single") txt = __switch(txt, "QUEX_OPTION_TOKEN_REPETITION_SUPPORT", token_repeat_test_txt != "false") txt = __switch(txt, "QUEX_OPTION_USER_MANAGED_TOKEN_MEMORY", Setup.token_memory_management_by_user_f) txt = __switch(txt, "__QUEX_OPTION_BIG_ENDIAN", Setup.buffer_byte_order == "big") txt = __switch(txt, "__QUEX_OPTION_CONVERTER_HELPER", Setup.converter_helper_required_f) txt = __switch(txt, "__QUEX_OPTION_CONVERTER", Setup.converter_f) txt = __switch(txt, "QUEX_OPTION_INDENTATION_TRIGGER", IndentationSupportF) txt = __switch(txt, "__QUEX_OPTION_LITTLE_ENDIAN", Setup.buffer_byte_order == "little") txt = __switch(txt, "__QUEX_OPTION_ON_ENTRY_HANDLER_PRESENT", entry_handler_active_f) txt = __switch(txt, "__QUEX_OPTION_ON_EXIT_HANDLER_PRESENT", exit_handler_active_f) txt = __switch(txt, "__QUEX_OPTION_PLAIN_C", Setup.language.upper() == "C") txt = __switch(txt, "__QUEX_OPTION_SUPPORT_BEGIN_OF_LINE_PRE_CONDITION", BeginOfLineSupportF) txt = __switch(txt, "__QUEX_OPTION_SYSTEM_ENDIAN", Setup.byte_order_is_that_of_current_system_f) txt = __switch(txt, "QUEX_OPTION_BUFFER_BASED_ANALYZIS", Setup.buffer_based_analyzis_f) txt = __switch(txt, "__QUEX_OPTION_ENGINE_RUNNING_ON_CODEC", Setup.buffer_codec.name != "unicode") # -- token class related definitions token_descr = blackboard.token_type_definition # -- name of the character codec codec_name = make_safe_identifier(Setup.buffer_codec.name).lower() # Setup.buffer_element_size can be '-1'. This signals then that # sizeof(QUEX_TYPE_CHARACTER) needs to be used. A numeric value # is required here. character_size_str = "%i" % Setup.buffer_element_size def namespace(NameSpaceList): result = Lng.NAMESPACE_REFERENCE(NameSpaceList, TrailingDelimiterF=False) if len(result) == 0: return "" assert Setup.language.upper() != "C++" or len(result) > 2, \ "Error while generating namespace reference '%s'" % result return result txt = blue_print(txt, [ ["$$BUFFER_LIMIT_CODE$$", "0x%X" % Setup.buffer_limit_code], ["$$QUEX_SETTING_CHARACTER_CODEC$$", codec_name], ["$$INCLUDE_GUARD_EXTENSION$$", get_include_guard_extension(Lng.NAMESPACE_REFERENCE(Setup.analyzer_name_space) + "__" + Setup.analyzer_class_name)], ["$$INITIAL_LEXER_MODE_ID$$", "QUEX_NAME(ModeID_%s)" % blackboard.initial_mode.get_pure_text()], ["$$LEXER_BUILD_DATE$$", time.asctime()], ["$$LEXER_CLASS_NAME$$", LexerClassName], ["$$LEXER_CLASS_NAME_SAFE$$", Setup.analyzer_name_safe], ["$$LEXER_DERIVED_CLASS_NAME$$", analyzer_derived_class_name], ["$$MAX_MODE_CLASS_N$$", repr(len(ModeDescriptionDB))], ["$$NAMESPACE_MAIN$$", namespace(Setup.analyzer_name_space)], ["$$NAMESPACE_MAIN_CLOSE$$", Lng.NAMESPACE_CLOSE(Setup.analyzer_name_space).replace("\n", "\\\n")], ["$$NAMESPACE_MAIN_OPEN$$", Lng.NAMESPACE_OPEN(Setup.analyzer_name_space).replace("\n", "\\\n")], ["$$NAMESPACE_TOKEN$$", namespace(token_descr.name_space)], ["$$NAMESPACE_TOKEN_CLOSE$$", Lng.NAMESPACE_CLOSE(token_descr.name_space).replace("\n", "\\\n")], ["$$NAMESPACE_TOKEN_OPEN$$", Lng.NAMESPACE_OPEN(token_descr.name_space).replace("\n", "\\\n")], ["$$PATH_TERMINATION_CODE$$", "0x%X" % Setup.path_limit_code], ["$$QUEX_SETTING_BUFFER_FILLERS_CONVERTER_NEW$$", converter_new_str], ["$$QUEX_TYPE_CHARACTER$$", Setup.buffer_element_type], ["$$QUEX_SETTING_CHARACTER_SIZE$$", character_size_str], ["$$QUEX_NAMESPACE_LEXEME_NULL_OPEN$$", Lng.NAMESPACE_OPEN(Setup.lexeme_null_namespace).replace("\n", "\\\n")], ["$$QUEX_NAMESPACE_LEXEME_NULL_CLOSE$$", Lng.NAMESPACE_CLOSE(Setup.lexeme_null_namespace).replace("\n", "\\\n")], ["$$QUEX_LEXEME_NULL$$", Setup.lexeme_null_full_name_cpp], ["$$QUEX_LEXEME_NULL_SAFE$$", Setup.lexeme_null_name_safe], ["$$QUEX_LEXEME_NULL_IN_ITS_NAMESPACE$$", Setup.lexeme_null_name], ["$$QUEX_VERSION$$", QUEX_VERSION], ["$$TOKEN_CLASS$$", token_descr.class_name], ["$$TOKEN_CLASS_NAME_SAFE$$", token_descr.class_name_safe], ["$$TOKEN_COLUMN_N_TYPE$$", token_descr.column_number_type.get_pure_text()], ["$$TOKEN_ID_TYPE$$", token_descr.token_id_type.get_pure_text()], ["$$TOKEN_LINE_N_TYPE$$", token_descr.line_number_type.get_pure_text()], ["$$TOKEN_PREFIX$$", Setup.token_id_prefix], ["$$TOKEN_QUEUE_SAFETY_BORDER$$", repr(Setup.token_queue_safety_border)], ["$$TOKEN_QUEUE_SIZE$$", repr(Setup.token_queue_size)], ["$$TOKEN_REPEAT_TEST$$", token_repeat_test_txt], ["$$USER_LEXER_VERSION$$", Setup.user_application_version_id], ]) return txt
def parse(ForeignTokenIdFile, CommentDelimiterList): """This function somehow interprets the user defined token id file--if there is one. It does this in order to find the names of defined token ids. It does some basic interpretation and include file following, but: **it is in no way perfect**. Since its only purpose is to avoid warnings about token ids that are not defined it is not essential that it may fail sometimes. It is more like a nice feature that quex tries to find definitions on its own. Nevertheless, it should work in the large majority of cases. """ # Regular expression to find '#include <something>' and extract the 'something' # in a 'group'. Note that '(' ')' cause the storage of parts of the match. IncludeRE = "#[ \t]*include[ \t]*[\"<]([^\">]+)[\">]" include_re_obj = re.compile(IncludeRE) def get_line_n_of_include(FileName, IncludedFileName): fh = open_file_or_die(FileName, Mode="rb") line_n = 0 for line in fh.readlines(): line_n += 1 if include_re_obj.search( line) is not None and line.find(IncludedFileName) != -1: break else: # Included file must appear in including file, but tolerate for safety. pass fh.close() return line_n # validate(...) ensured, that the file exists. work_list = [ForeignTokenIdFile] done_list = [] not_found_list = [] recursive_list = [] found_db = {} while len(work_list) != 0: file_name = work_list.pop() content = __delete_comments( get_file_content_or_die(file_name, Mode="rb"), CommentDelimiterList) done_list.append(os.path.normpath(file_name)) # (*) Search for TokenID definitions begin_i = 0 end_i = len(content) if Setup.token_id_foreign_definition_file_region_begin_re is not None: match = Setup.token_id_foreign_definition_file_region_begin_re.search( content) if match is not None: begin_i = match.end() if Setup.token_id_foreign_definition_file_region_end_re is not None: match = Setup.token_id_foreign_definition_file_region_end_re.search( content, pos=begin_i) if match is not None: end_i = match.start() content = content[begin_i:end_i] token_id_list = __extract_token_ids(content, file_name) if len(token_id_list) != 0: found_db[file_name] = copy(token_id_list) token_id_foreign_set.update(token_id_list) for token_name in token_id_list: # NOTE: The line number might be wrong, because of the comment deletion line_n = 0 # NOTE: The actual token value is not important, since the token's numeric # identifier is defined in the user's header. We do not care. prefix_less_token_name = cut_token_id_prefix(token_name) token_id_db[prefix_less_token_name] = \ TokenInfo(prefix_less_token_name, None, None, SourceRef(file_name, line_n)) # (*) find "#include" statements # 'set' ensures that each entry is unique include_file_set = set(include_re_obj.findall(content)) # -- ensure that included files exist and are not included twice for included_file in include_file_set: normed_included_file = os.path.normpath(included_file) if included_file in done_list: line_n = get_line_n_of_include(file_name, included_file) recursive_list.append((file_name, line_n, included_file)) elif not os.access(normed_included_file, os.F_OK): line_n = get_line_n_of_include(file_name, included_file) not_found_list.append((file_name, line_n, included_file)) elif normed_included_file not in done_list: work_list.append(included_file) if Setup.token_id_foreign_definition_file_show_f: if len(found_db) == 0: error_msg("No token ids with prefix '%s' found in" % Setup.token_id_prefix + "'%s' or included files." % Setup.token_id_foreign_definition_file, NoteF=True) else: txt = [] for file_name, result in found_db.iteritems(): result = set(result) L = max(map(len, result)) txt.append("Token ids found in file '%s' {\n" % file_name) for name in sorted(result): shorty = cut_token_id_prefix(name) fully = Setup.token_id_prefix + shorty txt.append(" %s %s=> '%s'\n" % (fully, space(L, name), shorty)) txt.append("}") txt.append("\n") if txt: txt = txt[:-1] error_msg("".join(txt), NoteF=True) ErrorN = NotificationDB.token_id_ignored_files_report if ErrorN not in Setup.suppressed_notification_list: if len(not_found_list) != 0: not_found_list.sort() error_msg("Files not found:", not_found_list[0][0], LineN=not_found_list[0][1], DontExitF=True) for file_name, line_n, included_file in not_found_list: error_msg("%s" % included_file, file_name, LineN=line_n, DontExitF=True) if len(recursive_list) != 0: recursive_list.sort() error_msg("Files recursively included (ignored second inclusion):", recursive_list[0][0], LineN=recursive_list[0][1], DontExitF=True) for file_name, line_n, included_file in recursive_list: error_msg("%s" % included_file, file_name, LineN=line_n, DontExitF=True) if len(not_found_list) != 0 or len(recursive_list) != 0: # file_name and line_n will be taken from last iteration of last for loop. error_msg( "\nNote, that quex does not handle C-Preprocessor instructions.", file_name, LineN=line_n, DontExitF=True, SuppressCode=ErrorN)
def do(ModeDB): IndentationSupportF = blackboard.requires_indentation_count(ModeDB) BeginOfLineSupportF = blackboard.requires_begin_of_line_condition_support( ModeDB) LanguageDB = Setup.language_db LexerClassName = Setup.analyzer_class_name ConfigurationTemplateFile =( QUEX_PATH \ + Setup.language_db["$code_base"] \ + "/analyzer/configuration/TXT").replace("//","/") txt = get_file_content_or_die(ConfigurationTemplateFile) # -- check if exit/entry handlers have to be active entry_handler_active_f = False exit_handler_active_f = False for mode in ModeDB.values(): if len(mode.get_code_fragment_list("on_entry")) != 0: entry_handler_active_f = True if len(mode.get_code_fragment_list("on_exit")) != 0: exit_handler_active_f = True # Buffer filler converter (0x0 means: no buffer filler converter) converter_new_str = "# define QUEX_SETTING_BUFFER_FILLERS_CONVERTER_NEW " if Setup.converter_user_new_func != "": converter_new_str += Setup.converter_user_new_func + "()" else: converter_new_str = "/* " + converter_new_str + " */" # Token repetition support token_repeat_test_txt = "" for token_id_str in blackboard.token_repetition_token_id_list: token_repeat_test_txt += "TokenID == %s || " % token_id_str if token_repeat_test_txt != "": token_repeat_test_txt = token_repeat_test_txt[:-3] else: token_repeat_test_txt = "false" if Setup.analyzer_derived_class_name != "": analyzer_derived_class_name = Setup.analyzer_derived_class_name else: analyzer_derived_class_name = Setup.analyzer_class_name txt = __switch(txt, "QUEX_OPTION_COLUMN_NUMBER_COUNTING", Setup.count_column_number_f) txt = __switch(txt, "QUEX_OPTION_COMPUTED_GOTOS", False) txt = __switch(txt, "QUEX_OPTION_CONVERTER_ICONV", Setup.converter_iconv_f) txt = __switch(txt, "QUEX_OPTION_CONVERTER_ICU", Setup.converter_icu_f) txt = __switch(txt, "QUEX_OPTION_INCLUDE_STACK", Setup.include_stack_support_f) txt = __switch(txt, "QUEX_OPTION_LINE_NUMBER_COUNTING", Setup.count_line_number_f) txt = __switch(txt, "QUEX_OPTION_POST_CATEGORIZER", Setup.post_categorizer_f) txt = __switch(txt, "QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK", Setup.mode_transition_check_f) txt = __switch(txt, "QUEX_OPTION_STRING_ACCUMULATOR", Setup.string_accumulator_f) txt = __switch(txt, "QUEX_OPTION_TOKEN_POLICY_QUEUE", Setup.token_policy == "queue") txt = __switch(txt, "QUEX_OPTION_TOKEN_POLICY_SINGLE", Setup.token_policy == "single") txt = __switch(txt, "QUEX_OPTION_TOKEN_REPETITION_SUPPORT", token_repeat_test_txt != "false") txt = __switch(txt, "QUEX_OPTION_USER_MANAGED_TOKEN_MEMORY", Setup.token_memory_management_by_user_f) txt = __switch(txt, "__QUEX_OPTION_BIG_ENDIAN", Setup.buffer_byte_order == "big") txt = __switch(txt, "__QUEX_OPTION_CONVERTER_HELPER", Setup.converter_helper_required_f) txt = __switch(txt, "__QUEX_OPTION_CONVERTER", Setup.converter_f) txt = __switch(txt, "QUEX_OPTION_INDENTATION_TRIGGER", IndentationSupportF) txt = __switch(txt, "__QUEX_OPTION_LITTLE_ENDIAN", Setup.buffer_byte_order == "little") txt = __switch(txt, "__QUEX_OPTION_ON_ENTRY_HANDLER_PRESENT", entry_handler_active_f) txt = __switch(txt, "__QUEX_OPTION_ON_EXIT_HANDLER_PRESENT", exit_handler_active_f) txt = __switch(txt, "__QUEX_OPTION_PLAIN_C", Setup.language.upper() == "C") txt = __switch(txt, "__QUEX_OPTION_SUPPORT_BEGIN_OF_LINE_PRE_CONDITION", BeginOfLineSupportF) txt = __switch(txt, "__QUEX_OPTION_SYSTEM_ENDIAN", Setup.byte_order_is_that_of_current_system_f) txt = __switch(txt, "QUEX_OPTION_BUFFER_BASED_ANALYZIS", Setup.buffer_based_analyzis_f) txt = __switch(txt, "__QUEX_OPTION_ENGINE_RUNNING_ON_CODEC", Setup.buffer_codec != "unicode") # -- token class related definitions token_descr = blackboard.token_type_definition # -- name of the character codec codec_name = "unicode" if Setup.buffer_codec != "unicode": codec_name = make_safe_identifier(Setup.buffer_codec).lower() # Setup.buffer_element_size can be '-1'. This signals then that # sizeof(QUEX_TYPE_CHARACTER) needs to be used. A numeric value # is required here. character_size_str = "%i" % Setup.buffer_element_size def namespace(NameSpaceList): result = Setup.language_db.NAMESPACE_REFERENCE(NameSpaceList) if result == "::": return "" assert Setup.language.upper() != "C++" or len(result) > 2, \ "Error while generating namespace reference '%s'" % result return result[:-2] txt = blue_print(txt, [ ["$$BUFFER_LIMIT_CODE$$", "0x%X" % Setup.buffer_limit_code], ["$$QUEX_SETTING_CHARACTER_CODEC$$", codec_name], [ "$$INCLUDE_GUARD_EXTENSION$$", get_include_guard_extension( LanguageDB.NAMESPACE_REFERENCE(Setup.analyzer_name_space) + "__" + Setup.analyzer_class_name) ], [ "$$INITIAL_LEXER_MODE_ID$$", "QUEX_NAME(ModeID_%s)" % blackboard.initial_mode.get_pure_code() ], ["$$LEXER_BUILD_DATE$$", time.asctime()], ["$$LEXER_CLASS_NAME$$", LexerClassName], ["$$LEXER_CLASS_NAME_SAFE$$", Setup.analyzer_name_safe], ["$$LEXER_DERIVED_CLASS_NAME$$", analyzer_derived_class_name], ["$$MAX_MODE_CLASS_N$$", repr(len(ModeDB))], ["$$NAMESPACE_MAIN$$", namespace(Setup.analyzer_name_space)], [ "$$NAMESPACE_MAIN_CLOSE$$", LanguageDB.NAMESPACE_CLOSE(Setup.analyzer_name_space).replace( "\n", "\\\n") ], [ "$$NAMESPACE_MAIN_OPEN$$", LanguageDB.NAMESPACE_OPEN(Setup.analyzer_name_space).replace( "\n", "\\\n") ], ["$$NAMESPACE_TOKEN$$", namespace(token_descr.name_space)], [ "$$NAMESPACE_TOKEN_CLOSE$$", LanguageDB.NAMESPACE_CLOSE(token_descr.name_space).replace( "\n", "\\\n") ], [ "$$NAMESPACE_TOKEN_OPEN$$", LanguageDB.NAMESPACE_OPEN(token_descr.name_space).replace( "\n", "\\\n") ], ["$$PATH_TERMINATION_CODE$$", "0x%X" % Setup.path_limit_code], ["$$QUEX_SETTING_BUFFER_FILLERS_CONVERTER_NEW$$", converter_new_str], ["$$QUEX_TYPE_CHARACTER$$", Setup.buffer_element_type], ["$$QUEX_SETTING_CHARACTER_SIZE$$", character_size_str], [ "$$QUEX_NAMESPACE_LEXEME_NULL_OPEN$$", LanguageDB.NAMESPACE_OPEN(Setup.lexeme_null_namespace).replace( "\n", "\\\n") ], [ "$$QUEX_NAMESPACE_LEXEME_NULL_CLOSE$$", LanguageDB.NAMESPACE_CLOSE(Setup.lexeme_null_namespace).replace( "\n", "\\\n") ], ["$$QUEX_LEXEME_NULL$$", Setup.lexeme_null_full_name_cpp], ["$$QUEX_LEXEME_NULL_SAFE$$", Setup.lexeme_null_name_safe], ["$$QUEX_LEXEME_NULL_IN_ITS_NAMESPACE$$", Setup.lexeme_null_name], ["$$QUEX_VERSION$$", QUEX_VERSION], ["$$TOKEN_CLASS$$", token_descr.class_name], ["$$TOKEN_CLASS_NAME_SAFE$$", token_descr.class_name_safe], [ "$$TOKEN_COLUMN_N_TYPE$$", token_descr.column_number_type.get_pure_code() ], ["$$TOKEN_ID_TYPE$$", token_descr.token_id_type.get_pure_code()], [ "$$TOKEN_LINE_N_TYPE$$", token_descr.line_number_type.get_pure_code() ], ["$$TOKEN_PREFIX$$", Setup.token_id_prefix], [ "$$TOKEN_QUEUE_SAFETY_BORDER$$", repr(Setup.token_queue_safety_border) ], ["$$TOKEN_QUEUE_SIZE$$", repr(Setup.token_queue_size)], ["$$TOKEN_REPEAT_TEST$$", token_repeat_test_txt], ["$$USER_LEXER_VERSION$$", Setup.user_application_version_id], ]) return txt
def parse(ForeignTokenIdFile, CommentDelimiterList): """This function somehow interprets the user defined token id file--if there is one. It does this in order to find the names of defined token ids. It does some basic interpretation and include file following, but: **it is in no way perfect**. Since its only purpose is to avoid warnings about token ids that are not defined it is not essential that it may fail sometimes. It is more like a nice feature that quex tries to find definitions on its own. Nevertheless, it should work in the large majority of cases. """ # Regular expression to find '#include <something>' and extract the 'something' # in a 'group'. Note that '(' ')' cause the storage of parts of the match. IncludeRE = "#[ \t]*include[ \t]*[\"<]([^\">]+)[\">]" include_re_obj = re.compile(IncludeRE) def get_line_n_of_include(FileName, IncludedFileName): fh = open_file_or_die(FileName, Mode="rb") line_n = 0 for line in fh.readlines(): line_n += 1 if include_re_obj.search(line) is not None and line.find(IncludedFileName) != -1: break else: # Included file must appear in including file, but tolerate for safety. pass fh.close() return line_n # validate(...) ensured, that the file exists. work_list = [ ForeignTokenIdFile ] done_list = [] not_found_list = [] recursive_list = [] found_db = {} while len(work_list) != 0: file_name = work_list.pop() content = __delete_comments(get_file_content_or_die(file_name, Mode="rb"), CommentDelimiterList) done_list.append(os.path.normpath(file_name)) # (*) Search for TokenID definitions begin_i = 0 end_i = len(content) if Setup.token_id_foreign_definition_file_region_begin_re is not None: match = Setup.token_id_foreign_definition_file_region_begin_re.search(content) if match is not None: begin_i = match.end() if Setup.token_id_foreign_definition_file_region_end_re is not None: match = Setup.token_id_foreign_definition_file_region_end_re.search(content, pos=begin_i) if match is not None: end_i = match.start() content = content[begin_i:end_i] token_id_list = __extract_token_ids(content, file_name) if len(token_id_list) != 0: found_db[file_name] = copy(token_id_list) token_id_foreign_set.update(token_id_list) for token_name in token_id_list: # NOTE: The line number might be wrong, because of the comment deletion line_n = 0 # NOTE: The actual token value is not important, since the token's numeric # identifier is defined in the user's header. We do not care. prefix_less_token_name = cut_token_id_prefix(token_name) token_id_db[prefix_less_token_name] = \ TokenInfo(prefix_less_token_name, None, None, SourceRef(file_name, line_n)) # (*) find "#include" statements # 'set' ensures that each entry is unique include_file_set = set(include_re_obj.findall(content)) # -- ensure that included files exist and are not included twice for included_file in include_file_set: normed_included_file = os.path.normpath(included_file) if included_file in done_list: line_n = get_line_n_of_include(file_name, included_file) recursive_list.append((file_name, line_n, included_file)) elif not os.access(normed_included_file, os.F_OK): line_n = get_line_n_of_include(file_name, included_file) not_found_list.append((file_name, line_n, included_file)) elif normed_included_file not in done_list: work_list.append(included_file) if Setup.token_id_foreign_definition_file_show_f: if len(found_db) == 0: error_msg( "No token ids with prefix '%s' found in" % Setup.token_id_prefix + "'%s' or included files." % Setup.token_id_foreign_definition_file, NoteF=True) else: txt = [] for file_name, result in found_db.iteritems(): result = set(result) L = max(map(len, result)) txt.append("Token ids found in file '%s' {\n" % file_name) for name in sorted(result): shorty = cut_token_id_prefix(name) fully = Setup.token_id_prefix + shorty txt.append(" %s %s=> '%s'\n" % (fully, space(L, name), shorty)) txt.append("}") txt.append("\n") if txt: txt = txt[:-1] error_msg("".join(txt), NoteF=True) ErrorN = NotificationDB.token_id_ignored_files_report if ErrorN not in Setup.suppressed_notification_list: if len(not_found_list) != 0: not_found_list.sort() error_msg("Files not found:", not_found_list[0][0], LineN=not_found_list[0][1], DontExitF=True) for file_name, line_n, included_file in not_found_list: error_msg("%s" % included_file, file_name, LineN=line_n, DontExitF=True) if len(recursive_list) != 0: recursive_list.sort() error_msg("Files recursively included (ignored second inclusion):", recursive_list[0][0], LineN=recursive_list[0][1], DontExitF=True) for file_name, line_n, included_file in recursive_list: error_msg("%s" % included_file, file_name, LineN=line_n, DontExitF=True) if len(not_found_list) != 0 or len(recursive_list) != 0: # file_name and line_n will be taken from last iteration of last for loop. error_msg("\nNote, that quex does not handle C-Preprocessor instructions.", file_name, LineN=line_n, DontExitF=True, SuppressCode=ErrorN)