def do(global_setup): """Creates a file of token-ids from a given set of names. Creates also a function: const string& $$token$$::map_id_to_name(). """ # file contains simply a whitespace separated list of token-names output(global_setup) return # THIS IS A FIRST ATTEMPT TO PARSE FOREIGN TOKEN ID DEFINITIONS for input_file in global_setup.input_token_id_db: curr_tokens = file_in.open_file_or_die(input_file).read().split(";") curr_token_infos = map(lambda x: TokenInfo(x.split(), input_file), curr_tokens) for token_info in curr_token_infos: if token_info.name == "": continue if token_info.name in lexer_mode.token_id_db.keys(): print "%s:0:error: token name '%s' defined twice." % ( input_file, token_info.name) print "%s:0:error: previously defined here." % lexer_mode.token_id_db[ token_info.name].filename sys.exit(-1) lexer_mode.token_id_db[token_info.name] = token_info
def __copy_files(FileTxt): input_directory = QUEX_PATH output_directory = Setup.output_directory file_list = map(lambda x: Setup.language_db["$code_base"] + x.strip(), FileTxt.split()) # Ensure that all directories exist directory_list = [] for file in file_list: directory = path.dirname(output_directory + file) if directory in directory_list: continue directory_list.append(directory) # Sort directories according to length --> create parent directories before child for directory in sorted(directory_list, key=len): if os.access(directory, os.F_OK) == True: continue # Create also parent directories, if required os.makedirs(directory) for file in file_list: input_file = input_directory + file output_file = output_directory + file # Copy content = open_file_or_die(input_file, "rb").read() write_safely_and_close(output_file, content)
def get_codec_list_db(): """ ... [ CODEC_NAME [CODEC_NAME_LIST] [LANGUAGE_NAME_LIST] ] ... """ global __codec_list_db if __codec_list_db != []: return __codec_list_db file_name = QUEX_PATH + "/quex/data_base/codecs/00-ALL.txt" fh = open_file_or_die(file_name, "rb") # FIELD SEPARATOR: ';' # RECORD SEPARATOR: '\n' # FIELDS: [Python Coding Name] [Aliases] [Languages] # Aliases and Languages are separated by ',' __codec_list_db = [] for line in fh.readlines(): line = line.strip() if len(line) == 0 or line[0] == "#": continue fields = map(lambda x: x.strip(), line.split(";")) try: codec = fields[0] aliases_list = map(lambda x: x.strip(), fields[1].split(",")) language_list = map(lambda x: x.strip(), fields[2].split(",")) except: print "Error in line:\n%s\n" % line __codec_list_db.append([codec, aliases_list, language_list]) fh.close() return __codec_list_db
def UserCodeFragment_straighten_open_line_pragmas(filename, Language): if Language not in UserCodeFragment_OpenLinePragma.keys(): return fh = open_file_or_die(filename) norm_filename = get_file_reference(filename) new_content = "" line_n = 0 LinePragmaInfoList = UserCodeFragment_OpenLinePragma[Language] for line in fh.readlines(): line_n += 1 if Language == "C": for info in LinePragmaInfoList: if line.find(info[0]) == -1: continue line = info[1] # Since by some definition, line number pragmas < 32768; let us avoid # compiler warnings by setting line_n = min(line_n, 32768) line = line.replace("NUMBER", repr(int(min(line_n + 1, 32767)))) # Even under Windows (tm), the '/' is accepted. Thus do not rely on 'normpath' line = line.replace("FILENAME", norm_filename) line = line + "\n" new_content += line fh.close() write_safely_and_close(filename, new_content)
def __create_database_file(TargetEncoding, TargetEncodingName): """Writes a database file for a given TargetEncodingName. The TargetEncodingName is required to name the file where the data is to be stored. """ encoder = codecs.getencoder(TargetEncoding) prev_output = -1 db = [] bytes_per_char = -1 for input in range(0x110000): output, n = __get_transformation(encoder, input) # output, n = __get_transformation(encoder, input) if bytes_per_char == -1: bytes_per_char = n elif n != -1 and bytes_per_char != n: print "# not a constant size byte format." return False # Detect discontinuity in the mapping if prev_output == -1: if output != -1: input_interval = Interval(input) target_interval_begin = output elif output != prev_output + 1: # If interval was valid, append it to the database input_interval.end = input db.append((input_interval, target_interval_begin)) # If interval ahead is valid, prepare an object for it if output != -1: input_interval = Interval(input) target_interval_begin = output prev_output = output if prev_output != -1: input_interval.end = input self.__db.append((interval, target_interval_begin)) fh = open_file_or_die( QUEX_PATH + "/quex/data_base/codecs/%s.dat" % TargetEncoding, "wb") fh.write("# Describes mapping from Unicode Code pointer to Character code in %s (%s)\n" \ % (TargetEncoding, TargetEncodingName)) fh.write( "# [SourceInterval.begin] [SourceInterval.Size] [TargetInterval.begin] (all in hexidecimal)\n" ) for i, t in db: fh.write("%X %X %X\n" % (i.begin, i.end - i.begin, t)) fh.close() return True
def __create_database_file(TargetEncoding, TargetEncodingName): """Writes a database file for a given TargetEncodingName. The TargetEncodingName is required to name the file where the data is to be stored. """ encoder = codecs.getencoder(TargetEncoding) prev_output = -1 db = [] bytes_per_char = -1 for input in range(0x110000): output, n = __get_transformation(encoder, input) # output, n = __get_transformation(encoder, input) if bytes_per_char == -1: bytes_per_char = n elif n != -1 and bytes_per_char != n: print "# not a constant size byte format." return False # Detect discontinuity in the mapping if prev_output == -1: if output != -1: input_interval = Interval(input) target_interval_begin = output elif output != prev_output + 1: # If interval was valid, append it to the database input_interval.end = input db.append((input_interval, target_interval_begin)) # If interval ahead is valid, prepare an object for it if output != -1: input_interval = Interval(input) target_interval_begin = output prev_output = output if prev_output != -1: input_interval.end = input self.__db.append((interval, target_interval_begin)) fh = open_file_or_die(QUEX_PATH + "/quex/data_base/codecs/%s.dat" % TargetEncoding, "wb") fh.write("# Describes mapping from Unicode Code pointer to Character code in %s (%s)\n" \ % (TargetEncoding, TargetEncodingName)) fh.write("# [SourceInterval.begin] [SourceInterval.Size] [TargetInterval.begin] (all in hexidecimal)\n") for i, t in db: fh.write("%X %X %X\n" % (i.begin, i.end - i.begin, t)) fh.close() return True
def parse_token_id_file(ForeignTokenIdFile, TokenPrefix, CommentDelimiterList, IncludeRE): """This function somehow interprets the user defined token id file--if there is one. It does this in order to find the names of defined token ids. It does some basic interpretation and include file following, but: **it is in no way perfect**. Since its only purpose is to avoid warnings about token ids that are not defined it is not essential that it may fail sometimes. It is more like a nice feature that quex tries to find definitions on its own. Nevertheless, it should work in the large majority of cases. """ include_re_obj = re.compile(IncludeRE) # validate(...) ensured, that the file exists. work_list = [ ForeignTokenIdFile ] done_list = [] unfound_list = [] while work_list != []: fh = open_file_or_die(work_list.pop(), Mode="rb") content = fh.read() # delete any comment inside the file for opener, closer in CommentDelimiterList: content = delete_comment(content, opener, closer, LeaveNewlineDelimiter=True) # add any found token id to the list token_id_finding_list = extract_identifiers_with_specific_prefix(content, TokenPrefix) for token_name, line_n in token_id_finding_list: prefix_less_token_name = token_name[len(TokenPrefix):] # NOTE: The line number might be wrong, because of the comment deletion # NOTE: The actual token value is not important, since the token's numeric # identifier is defined in the user's header. We do not care. token_id_db[prefix_less_token_name] = \ TokenInfo(prefix_less_token_name, None, None, fh.name, line_n) # find "#include" statements include_file_list = include_re_obj.findall(content) include_file_list = filter(lambda file: file not in done_list, include_file_list) include_file_list = filter(lambda file: os.access(file, os.F_OK), include_file_list) work_list.extend(include_file_list) fh.close()
def get_codec_transformation_info(Codec, FH=-1, LineN=None): """Provides the information about the relation of character codes in a particular coding to unicode character codes. It is provided in the following form: # Codec Values Unicode Values [ (Source0_Begin, Source0_End, TargetInterval0_Begin), (Source1_Begin, Source1_End, TargetInterval1_Begin), (Source2_Begin, Source2_End, TargetInterval2_Begin), ... ] Arguments FH and LineN correspond to the arguments of error_msg. """ distinct_codec = __get_distinct_codec_name_for_alias(Codec) file_name = QUEX_PATH + "/quex/data_base/codecs/%s.dat" % distinct_codec fh = open_file_or_die(file_name, "rb") # Read coding into data structure transformation_list = [] if 1: line_n = -1 for line in fh.readlines(): line_n += 1 if line == "" or line[0] == "#": continue fields = line.split() source_begin = int("0x" + fields[0], 16) source_end = source_begin + int("0x" + fields[1], 16) target_begin = int("0x" + fields[2], 16) transformation_list.append( [source_begin, source_end, target_begin]) else: error_msg("Syntax error in database file for codec '%s'." % Codec, fh.name, line_n) return transformation_list
def do(global_setup): """Creates a file of token-ids from a given set of names. Creates also a function: const string& $$token$$::map_id_to_name(). """ # file contains simply a whitespace separated list of token-names output(global_setup) return # THIS IS A FIRST ATTEMPT TO PARSE FOREIGN TOKEN ID DEFINITIONS for input_file in global_setup.input_token_id_db: curr_tokens = file_in.open_file_or_die(input_file).read().split(";") curr_token_infos = map(lambda x: TokenInfo(x.split(), input_file), curr_tokens) for token_info in curr_token_infos: if token_info.name == "": continue if token_info.name in lexer_mode.token_id_db.keys(): print "%s:0:error: token name '%s' defined twice." % (input_file, token_info.name) print "%s:0:error: previously defined here." % lexer_mode.token_id_db[token_info.name].filename sys.exit(-1) lexer_mode.token_id_db[token_info.name] = token_info
def get_codec_transformation_info(Codec, FH=-1, LineN=None): """Provides the information about the relation of character codes in a particular coding to unicode character codes. It is provided in the following form: # Codec Values Unicode Values [ (Source0_Begin, Source0_End, TargetInterval0_Begin), (Source1_Begin, Source1_End, TargetInterval1_Begin), (Source2_Begin, Source2_End, TargetInterval2_Begin), ... ] Arguments FH and LineN correspond to the arguments of error_msg. """ distinct_codec = __get_distinct_codec_name_for_alias(Codec) file_name = QUEX_PATH + "/quex/data_base/codecs/%s.dat" % distinct_codec fh = open_file_or_die(file_name, "rb") # Read coding into data structure transformation_list = [] if 1: line_n = -1 for line in fh.readlines(): line_n += 1 if line == "" or line[0] == "#": continue fields = line.split() source_begin = int("0x" + fields[0], 16) source_end = source_begin + int("0x" + fields[1], 16) target_begin = int("0x" + fields[2], 16) transformation_list.append([source_begin, source_end, target_begin]) else: error_msg("Syntax error in database file for codec '%s'." % Codec, fh.name, line_n) return transformation_list
import os import sys sys.path.insert(0, os.environ["QUEX_PATH"]) from quex.output.cpp.core import write_configuration_header from quex.input.setup import setup as Setup import quex.input.setup_parser as setup_parser from quex.frs_py.file_in import open_file_or_die import quex.input.quex_file_parser as quex_file_parser setup_parser.do(["-i", "nothing", "-o", "TestAnalyzer", "--token-policy", "single", "--no-include-stack"]) # Parse default token file fh = open_file_or_die( os.environ["QUEX_PATH"] + Setup.language_db["$code_base"] + Setup.language_db["$token-default-file"] ) quex_file_parser.parse_section(fh) fh.close() BeginOfLineSupportF = True IndentationSupportF = False txt = write_configuration_header({}, IndentationSupportF, BeginOfLineSupportF) open("TestAnalyzer-configuration", "w").write(txt)
def _do(Descr): # The following things must be ensured before the function is called assert Descr != None assert Descr.__class__.__name__ == "TokenTypeDescriptor" ## ALLOW: Descr.get_member_db().keys() == [] TemplateFile = QUEX_PATH \ + Setup.language_db["$code_base"] \ + Setup.language_db["$token_template_file"] TemplateIFile = QUEX_PATH \ + Setup.language_db["$code_base"] \ + Setup.language_db["$token_template_i_file"] template_str = open_file_or_die(TemplateFile, Mode="rb").read() template_i_str = open_file_or_die(TemplateIFile, Mode="rb").read() virtual_destructor_str = "" if Descr.open_for_derivation_f: virtual_destructor_str = "virtual " if Descr.copy.get_pure_code() == "": # Default copy operation: Plain Copy of token memory copy_str = "__QUEX_STD_memcpy((void*)__this, (void*)__That, sizeof(QUEX_TYPE_TOKEN));\n" else: copy_str = Descr.copy.get_code() take_text_str = Descr.take_text.get_code() if take_text_str == "": take_text_str = "return true;\n" include_guard_extension_str = get_include_guard_extension( Setup.language_db["$namespace-ref"](Descr.name_space) + "__" + Descr.class_name) # In case of plain 'C' the class name must incorporate the namespace (list) token_class_name = Descr.class_name if Setup.language == "C": token_class_name = Setup.token_class_name_safe txt = blue_print(template_str, [ ["$$BODY$$", Descr.body.get_code()], ["$$CONSTRUCTOR$$", Descr.constructor.get_code()], ["$$COPY$$", copy_str], ["$$DESTRUCTOR$$", Descr.destructor.get_code()], ["$$DISTINCT_MEMBERS$$", get_distinct_members(Descr)], ["$$FOOTER$$", Descr.footer.get_code()], ["$$FUNC_TAKE_TEXT$$", take_text_str], ["$$HEADER$$", Descr.header.get_code()], ["$$INCLUDE_GUARD_EXTENSION$$", include_guard_extension_str], [ "$$NAMESPACE_CLOSE$$", Setup.language_db["$namespace-close"]( Descr.name_space) ], [ "$$NAMESPACE_OPEN$$", Setup.language_db["$namespace-open"]( Descr.name_space) ], ["$$QUICK_SETTERS$$", get_quick_setters(Descr)], ["$$SETTERS_GETTERS$$", get_setter_getter(Descr)], ["$$TOKEN_CLASS$$", token_class_name], ["$$TOKEN_REPETITION_N_GET$$", Descr.repetition_get.get_code()], ["$$TOKEN_REPETITION_N_SET$$", Descr.repetition_set.get_code()], ["$$UNION_MEMBERS$$", get_union_members(Descr)], ["$$VIRTUAL_DESTRUCTOR$$", virtual_destructor_str], ]) txt_i = blue_print(template_i_str, [ ["$$CONSTRUCTOR$$", Descr.constructor.get_code()], ["$$COPY$$", copy_str], ["$$DESTRUCTOR$$", Descr.destructor.get_code()], ["$$FOOTER$$", Descr.footer.get_code()], ["$$FUNC_TAKE_TEXT$$", take_text_str], ["$$INCLUDE_GUARD_EXTENSION$$", include_guard_extension_str], [ "$$NAMESPACE_CLOSE$$", Setup.language_db["$namespace-close"]( Descr.name_space) ], [ "$$NAMESPACE_OPEN$$", Setup.language_db["$namespace-open"]( Descr.name_space) ], ["$$TOKEN_CLASS$$", token_class_name], ["$$TOKEN_REPETITION_N_GET$$", Descr.repetition_get.get_code()], ["$$TOKEN_REPETITION_N_SET$$", Descr.repetition_set.get_code()], ]) # Return declaration and implementation as two strings return txt, txt_i
def write_engine_header(Modes, Setup): QuexClassHeaderFileTemplate = (Setup.QUEX_TEMPLATE_DB_DIR + "/template/lexical_analyzer_class").replace("//","/") CoreEngineDefinitionsHeader = (Setup.QUEX_TEMPLATE_DB_DIR + "/core_engine/").replace("//","/") QuexClassHeaderFileOutput = Setup.output_file_stem LexerClassName = Setup.output_engine_name VersionID = Setup.input_application_version_id QuexVersionID = Setup.QUEX_VERSION # -- determine character type according to number of bytes per ucs character code point # for the internal engine. quex_character_type_str = { 1: "uint8_t ", 2: "uint16_t", 4: "uint32_t", "wchar_t": "wchar_t" }[Setup.bytes_per_ucs_code_point] quex_lexeme_type_str = { 1: "char ", 2: "int16_t", 4: "int32_t", "wchar_t": "wchar_t" }[Setup.bytes_per_ucs_code_point] # are bytes of integers Setup 'little endian' or 'big endian' ? if Setup.byte_order == "little": quex_coding_name_str = { 1: "ASCII", 2: "UCS-2LE", 4: "UCS-4LE", "wchar_t": "WCHAR_T" }[Setup.bytes_per_ucs_code_point] else: quex_coding_name_str = { 1: "ASCII", 2: "UCS-2BE", 4: "UCS-4BE", "wchar_t": "WCHAR_T" }[Setup.bytes_per_ucs_code_point] # -- determine whether the lexical analyser needs indentation counting # support. if one mode has an indentation handler, than indentation # support must be provided. indentation_support_f = False for mode in Modes.values(): if mode.on_indentation.get_code() != "": indentation_support_f = True break lex_id_definitions_str = "" # NOTE: First mode-id needs to be '1' for compatibility with flex generated engines i = 0 for name in Modes.keys(): i += 1 lex_id_definitions_str += "const int LEX_ID_%s = %i;\n" % (name, i) include_guard_extension = get_include_guard_extension(Setup.output_file_stem) # -- instances of mode classes as members of the lexer mode_object_members_txt, \ constructor_txt, \ mode_specific_functions_txt, \ friend_txt = \ get_mode_class_related_code_fragments(Modes.values(), LexerClassName) # -- define a pointer that directly has the type of the derived class if Setup.input_derived_class_name == "": Setup.input_derived_class_name = LexerClassName derived_class_type_declaration = "" else: derived_class_type_declaration = "class %s;" % Setup.input_derived_class_name # -- the friends of the class friends_str = "" for friend in Setup.input_lexer_class_friends: friends_str += " friend class %s;\n" % friend # -- the class body extension class_body_extension_str = lexer_mode.class_body.get_code() # -- the class constructor extension class_constructor_extension_str = lexer_mode.class_init.get_code() fh = open_file_or_die(QuexClassHeaderFileTemplate) template_code_txt = fh.read() fh.close() # -- check if exit/entry handlers have to be active entry_handler_active_f = False exit_handler_active_f = False for mode in Modes.values(): if mode.on_entry_code_fragments() != []: entry_handler_active_f = True if mode.on_exit_code_fragments() != []: exit_handler_active_f = True txt = template_code_txt def set_switch(txt, SwitchF, Name): if SwitchF: txt = txt.replace("$$SWITCH$$ %s" % Name, "#define %s" % Name) else: txt = txt.replace("$$SWITCH$$ %s" % Name, "// #define %s" % Name) return txt txt = set_switch(txt, entry_handler_active_f, "__QUEX_OPTION_ON_ENTRY_HANDLER_PRESENT") txt = set_switch(txt, exit_handler_active_f, "__QUEX_OPTION_ON_EXIT_HANDLER_PRESENT") txt = set_switch(txt, indentation_support_f, "__QUEX_OPTION_INDENTATION_TRIGGER_SUPPORT") txt = set_switch(txt, True, "__QUEX_OPTION_SUPPORT_BEGIN_OF_LINE_PRE_CONDITION") txt = set_switch(txt, Setup.enable_iconv_f, "QUEX_OPTION_ENABLE_ICONV") txt = set_switch(txt, not Setup.disable_token_queue_f, "QUEX_OPTION_TOKEN_SENDING_VIA_QUEUE") txt = set_switch(txt, not Setup.disable_string_accumulator_f, "QUEX_OPTION_STRING_ACCUMULATOR") txt = set_switch(txt, Setup.post_categorizer_f, "QUEX_OPTION_POST_CATEGORIZER") txt = set_switch(txt, True, "QUEX_OPTION_VIRTUAL_FUNCTION_ON_ACTION_ENTRY") txt = set_switch(txt, True, "QUEX_OPTION_LINE_NUMBER_COUNTING") txt = set_switch(txt, True, "QUEX_OPTION_COLUMN_NUMBER_COUNTING") txt = set_switch(txt, Setup.output_debug_f, "QUEX_OPTION_DEBUG_TOKEN_SENDING") txt = set_switch(txt, Setup.output_debug_f, "QUEX_OPTION_DEBUG_MODE_TRANSITIONS") txt = set_switch(txt, Setup.output_debug_f, "QUEX_OPTION_DEBUG_QUEX_PATTERN_MATCHES") txt = set_switch(txt, True, "QUEX_OPTION_INCLUDE_STACK_SUPPORT") txt = set_switch(txt, not Setup.no_mode_transition_check_f, "QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK") txt = blue_print(txt, [ ["$$BUFFER_LIMIT_CODE$$", "0x%X" % Setup.buffer_limit_code], ["$$CONSTRUCTOR_EXTENSTION$$", class_constructor_extension_str], ["$$CONSTRUCTOR_MODE_DB_INITIALIZATION_CODE$$", constructor_txt], ["$$CORE_ENGINE_DEFINITIONS_HEADER$$", CoreEngineDefinitionsHeader], ["$$CLASS_BODY_EXTENSION$$", class_body_extension_str], ["$$INCLUDE_GUARD_EXTENSION$$", include_guard_extension], ["$$INITIAL_LEXER_MODE_ID$$", "LEX_ID_" + lexer_mode.initial_mode.get_code()], ["$$LEXER_BUILD_DATE$$", time.asctime()], ["$$LEXER_BUILD_VERSION$$", VersionID], ["$$LEXER_CLASS_FRIENDS$$", friends_str], ["$$LEXER_CLASS_NAME$$", LexerClassName], ["$$LEXER_DERIVED_CLASS_DECL$$", derived_class_type_declaration], ["$$LEXER_DERIVED_CLASS_NAME$$", Setup.input_derived_class_name], ["$$LEX_ID_DEFINITIONS$$", lex_id_definitions_str], ["$$MAX_MODE_CLASS_N$$", repr(len(Modes))], ["$$MODE_CLASS_FRIENDS$$", friend_txt], ["$$MODE_OBJECT_MEMBERS$$", mode_object_members_txt], ["$$MODE_SPECIFIC_ANALYSER_FUNCTIONS$$", mode_specific_functions_txt], ["$$PRETTY_INDENTATION$$", " " + " " * (len(LexerClassName)*2 + 2)], ["$$QUEX_TEMPLATE_DIR$$", Setup.QUEX_TEMPLATE_DB_DIR], ["$$QUEX_VERSION$$", QuexVersionID], ["$$TOKEN_CLASS$$", Setup.input_token_class_name], ["$$TOKEN_CLASS_DEFINITION_FILE$$", Setup.input_token_class_file.replace("//","/")], ["$$TOKEN_ID_DEFINITION_FILE$$", Setup.output_token_id_file.replace("//","/")], ["$$QUEX_CHARACTER_TYPE$$", quex_character_type_str], ["$$QUEX_LEXEME_TYPE$$", quex_lexeme_type_str], ["$$CORE_ENGINE_CHARACTER_CODING$$", quex_coding_name_str], ["$$USER_DEFINED_HEADER$$", lexer_mode.header.get_code() + "\n"], ]) fh_out = open(QuexClassHeaderFileOutput, "wb") if os.linesep != "\n": txt = txt.replace("\n", os.linesep) fh_out.write(txt) fh_out.close()
def write_engine_header(Modes, Setup): QuexClassHeaderFileTemplate = (Setup.QUEX_TEMPLATE_DB_DIR + "/template/lexical_analyzer_class").replace( "//", "/") CoreEngineDefinitionsHeader = (Setup.QUEX_TEMPLATE_DB_DIR + "/core_engine/").replace("//", "/") QuexClassHeaderFileOutput = Setup.output_file_stem LexerClassName = Setup.output_engine_name VersionID = Setup.input_application_version_id QuexVersionID = Setup.QUEX_VERSION # -- determine character type according to number of bytes per ucs character code point # for the internal engine. quex_character_type_str = { 1: "uint8_t ", 2: "uint16_t", 4: "uint32_t", "wchar_t": "wchar_t" }[Setup.bytes_per_ucs_code_point] quex_lexeme_type_str = { 1: "char ", 2: "int16_t", 4: "int32_t", "wchar_t": "wchar_t" }[Setup.bytes_per_ucs_code_point] # are bytes of integers Setup 'little endian' or 'big endian' ? if Setup.byte_order == "little": quex_coding_name_str = { 1: "ASCII", 2: "UCS-2LE", 4: "UCS-4LE", "wchar_t": "WCHAR_T" }[Setup.bytes_per_ucs_code_point] else: quex_coding_name_str = { 1: "ASCII", 2: "UCS-2BE", 4: "UCS-4BE", "wchar_t": "WCHAR_T" }[Setup.bytes_per_ucs_code_point] # -- determine whether the lexical analyser needs indentation counting # support. if one mode has an indentation handler, than indentation # support must be provided. indentation_support_f = False for mode in Modes.values(): if mode.on_indentation.get_code() != "": indentation_support_f = True break lex_id_definitions_str = "" # NOTE: First mode-id needs to be '1' for compatibility with flex generated engines i = 0 for name in Modes.keys(): i += 1 lex_id_definitions_str += "const int LEX_ID_%s = %i;\n" % (name, i) include_guard_extension = get_include_guard_extension( Setup.output_file_stem) # -- instances of mode classes as members of the lexer mode_object_members_txt, \ constructor_txt, \ mode_specific_functions_txt, \ friend_txt = \ get_mode_class_related_code_fragments(Modes.values(), LexerClassName) # -- define a pointer that directly has the type of the derived class if Setup.input_derived_class_name == "": Setup.input_derived_class_name = LexerClassName derived_class_type_declaration = "" else: derived_class_type_declaration = "class %s;" % Setup.input_derived_class_name # -- the friends of the class friends_str = "" for friend in Setup.input_lexer_class_friends: friends_str += " friend class %s;\n" % friend # -- the class body extension class_body_extension_str = lexer_mode.class_body.get_code() # -- the class constructor extension class_constructor_extension_str = lexer_mode.class_init.get_code() fh = open_file_or_die(QuexClassHeaderFileTemplate) template_code_txt = fh.read() fh.close() # -- check if exit/entry handlers have to be active entry_handler_active_f = False exit_handler_active_f = False for mode in Modes.values(): if mode.on_entry_code_fragments() != []: entry_handler_active_f = True if mode.on_exit_code_fragments() != []: exit_handler_active_f = True txt = template_code_txt def set_switch(txt, SwitchF, Name): if SwitchF: txt = txt.replace("$$SWITCH$$ %s" % Name, "#define %s" % Name) else: txt = txt.replace("$$SWITCH$$ %s" % Name, "// #define %s" % Name) return txt txt = set_switch(txt, entry_handler_active_f, "__QUEX_OPTION_ON_ENTRY_HANDLER_PRESENT") txt = set_switch(txt, exit_handler_active_f, "__QUEX_OPTION_ON_EXIT_HANDLER_PRESENT") txt = set_switch(txt, indentation_support_f, "__QUEX_OPTION_INDENTATION_TRIGGER_SUPPORT") txt = set_switch(txt, True, "__QUEX_OPTION_SUPPORT_BEGIN_OF_LINE_PRE_CONDITION") txt = set_switch(txt, Setup.enable_iconv_f, "QUEX_OPTION_ENABLE_ICONV") txt = set_switch(txt, not Setup.disable_token_queue_f, "QUEX_OPTION_TOKEN_SENDING_VIA_QUEUE") txt = set_switch(txt, not Setup.disable_string_accumulator_f, "QUEX_OPTION_STRING_ACCUMULATOR") txt = set_switch(txt, Setup.post_categorizer_f, "QUEX_OPTION_POST_CATEGORIZER") txt = set_switch(txt, True, "QUEX_OPTION_VIRTUAL_FUNCTION_ON_ACTION_ENTRY") txt = set_switch(txt, True, "QUEX_OPTION_LINE_NUMBER_COUNTING") txt = set_switch(txt, True, "QUEX_OPTION_COLUMN_NUMBER_COUNTING") txt = set_switch(txt, Setup.output_debug_f, "QUEX_OPTION_DEBUG_TOKEN_SENDING") txt = set_switch(txt, Setup.output_debug_f, "QUEX_OPTION_DEBUG_MODE_TRANSITIONS") txt = set_switch(txt, Setup.output_debug_f, "QUEX_OPTION_DEBUG_QUEX_PATTERN_MATCHES") txt = set_switch(txt, True, "QUEX_OPTION_INCLUDE_STACK_SUPPORT") txt = set_switch(txt, not Setup.no_mode_transition_check_f, "QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK") txt = blue_print(txt, [ ["$$BUFFER_LIMIT_CODE$$", "0x%X" % Setup.buffer_limit_code], ["$$CONSTRUCTOR_EXTENSTION$$", class_constructor_extension_str], ["$$CONSTRUCTOR_MODE_DB_INITIALIZATION_CODE$$", constructor_txt], ["$$CORE_ENGINE_DEFINITIONS_HEADER$$", CoreEngineDefinitionsHeader], ["$$CLASS_BODY_EXTENSION$$", class_body_extension_str], ["$$INCLUDE_GUARD_EXTENSION$$", include_guard_extension], [ "$$INITIAL_LEXER_MODE_ID$$", "LEX_ID_" + lexer_mode.initial_mode.get_code() ], ["$$LEXER_BUILD_DATE$$", time.asctime()], ["$$LEXER_BUILD_VERSION$$", VersionID], ["$$LEXER_CLASS_FRIENDS$$", friends_str], ["$$LEXER_CLASS_NAME$$", LexerClassName], ["$$LEXER_DERIVED_CLASS_DECL$$", derived_class_type_declaration], ["$$LEXER_DERIVED_CLASS_NAME$$", Setup.input_derived_class_name], ["$$LEX_ID_DEFINITIONS$$", lex_id_definitions_str], ["$$MAX_MODE_CLASS_N$$", repr(len(Modes))], ["$$MODE_CLASS_FRIENDS$$", friend_txt], ["$$MODE_OBJECT_MEMBERS$$", mode_object_members_txt], ["$$MODE_SPECIFIC_ANALYSER_FUNCTIONS$$", mode_specific_functions_txt], [ "$$PRETTY_INDENTATION$$", " " + " " * (len(LexerClassName) * 2 + 2) ], ["$$QUEX_TEMPLATE_DIR$$", Setup.QUEX_TEMPLATE_DB_DIR], ["$$QUEX_VERSION$$", QuexVersionID], ["$$TOKEN_CLASS$$", Setup.input_token_class_name], [ "$$TOKEN_CLASS_DEFINITION_FILE$$", Setup.input_token_class_file.replace("//", "/") ], [ "$$TOKEN_ID_DEFINITION_FILE$$", Setup.output_token_id_file.replace("//", "/") ], ["$$QUEX_CHARACTER_TYPE$$", quex_character_type_str], ["$$QUEX_LEXEME_TYPE$$", quex_lexeme_type_str], ["$$CORE_ENGINE_CHARACTER_CODING$$", quex_coding_name_str], ["$$USER_DEFINED_HEADER$$", lexer_mode.header.get_code() + "\n"], ]) fh_out = open(QuexClassHeaderFileOutput, "wb") if os.linesep != "\n": txt = txt.replace("\n", os.linesep) fh_out.write(txt) fh_out.close()
def do(Modes, setup): QuexClassHeaderFileTemplate = (setup.QUEX_TEMPLATE_DB_DIR + "/template/lexical_analyzer_class-C").replace("//","/") CoreEngineDefinitionsHeader = (setup.QUEX_TEMPLATE_DB_DIR + "/core_engine/").replace("//","/") if setup.plain_memory_f: CoreEngineDefinitionsHeader += "definitions-quex-buffer.h" else: CoreEngineDefinitionsHeader += "definitions-plain-memory.h" QuexClassHeaderFileOutput = setup.output_file_stem LexerClassName = setup.output_engine_name VersionID = setup.input_application_version_id QuexVersionID = setup.QUEX_VERSION DerivedClassHeaderFileName = setup.input_derived_class_file ModeClassImplementationFile = setup.output_code_file # -- determine whether the lexical analyser needs indentation counting # support. if one mode has an indentation handler, than indentation # support must be provided. indentation_support_f = False for mode in Modes.values(): if mode.on_indentation.line_n != -1: indentation_support_f = True break lex_id_definitions_str = "" # NOTE: First mode-id needs to be '1' for compatibility with flex generated engines i = 0 for name in Modes.keys(): i += 1 lex_id_definitions_str += "const int LEX_ID_%s = %i;\n" % (name, i) include_guard_extension = get_include_guard_extension(setup.output_file_stem) # -- mode class member function definitions (on_entry, on_exit, has_base, ...) mode_class_member_functions_txt = mode_classes.do(Modes.values()) # -- instances of mode classes as members of the lexer mode_object_members_txt, \ constructor_txt, \ mode_specific_functions_txt, \ friend_txt = \ get_mode_class_related_code_fragments(Modes.values()) # -- get the code for the user defined all-match actions try: fh_aux = open(setup.output.user_match_action) user_match_action_str = fh_aux.read() fh_aux.close() except: user_match_action_str = "/* no extra class content */" # -- define a pointer that directly has the type of the derived class if setup.input_derived_class_name == "": setup.input_derived_class_name = LexerClassName derived_class_type_declaration = "" else: derived_class_type_declaration = "class %s;" % setup.input_derived_class_name # -- the friends of the class friends_str = "" for friend in setup.input_lexer_class_friends: friends_str += " friend class %s;\n" % friend # -- the class body extension class_body_extension_str = lexer_mode.class_body.get_code() # -- the class constructor extension class_constructor_extension_str = lexer_mode.class_init.get_code() fh = open_file_or_die(QuexClassHeaderFileTemplate) template_code_txt = fh.read() fh.close() # -- check if exit/entry handlers have to be active entry_handler_active_f = False exit_handler_active_f = False for mode in Modes.values(): if mode.on_entry_code_fragments() != []: entry_handler_active_f = True if mode.on_exit_code_fragments() != []: exit_handler_active_f = True txt = template_code_txt def set_switch(txt, SwitchF, Name): if SwitchF: txt = txt.replace("%%%%SWITCH%%%% %s" % Name, "#define %s" % Name) else: txt = txt.replace("%%%%SWITCH%%%% %s" % Name, "// #define %s" % Name) return txt txt = set_switch(txt, entry_handler_active_f, "__QUEX_OPTION_ON_ENTRY_HANDLER_PRESENT") txt = set_switch(txt, exit_handler_active_f, "__QUEX_OPTION_ON_EXIT_HANDLER_PRESENT") txt = set_switch(txt, indentation_support_f, "__QUEX_OPTION_INDENTATION_TRIGGER_SUPPORT") txt = set_switch(txt, setup.plain_memory_f, "__QUEX_CORE_OPTION_PLAIN_MEMORY_BASED") txt = set_switch(txt, True, "__QUEX_CORE_OPTION_SUPPORT_BEGIN_OF_LINE_PRE_CONDITION") txt = set_switch(txt, True, "QUEX_OPTION_VIRTUAL_FUNCTION_ON_ACTION_ENTRY") txt = set_switch(txt, False, "QUEX_OPTION_NO_LINE_NUMBER_COUNTING") txt = set_switch(txt, False, "QUEX_OPTION_NO_COLUMN_NUMBER_COUNTING") txt = blue_print(txt, [ ["%%CONSTRUCTOR_EXTENSTION%%", class_constructor_extension_str], ["%%CONSTRUCTOR_MODE_DB_INITIALIZATION_CODE%%", constructor_txt], ["%%CORE_ENGINE_DEFINITIONS_HEADER%%", CoreEngineDefinitionsHeader], ["%%CLASS_BODY_EXTENSION%%", class_body_extension_str], ["%%INCLUDE_GUARD_EXTENSION%%", include_guard_extension], ["%%INITIAL_LEXER_MODE_ID%%", "LEX_ID_" + lexer_mode.initial_mode.get_code()], ["%%LEXER_BUILD_DATE%%", time.asctime()], ["%%LEXER_BUILD_VERSION%%", VersionID], ["%%LEXER_CLASS_FRIENDS%%", friends_str], ["$$LEXER_CLASS_NAME$$", LexerClassName], ["%%LEXER_DERIVED_CLASS_DECL%%", derived_class_type_declaration], ["%%LEXER_DERIVED_CLASS_NAME%%", setup.input_derived_class_name], ["%%LEX_ID_DEFINITIONS%%", lex_id_definitions_str], ["%%MAX_MODE_CLASS_N%%", repr(len(Modes))], ["%%MODE_CLASS_FRIENDS%%", friend_txt], ["%%MODE_OBJECT_MEMBERS%%", mode_object_members_txt], ["%%MODE_SPECIFIC_ANALYSER_FUNCTIONS%%", mode_specific_functions_txt], ["%%PRETTY_INDENTATION%%", " " + " " * (len(LexerClassName)*2 + 2)], ["%%QUEX_TEMPLATE_DIR%%", setup.QUEX_TEMPLATE_DB_DIR], ["%%QUEX_VERSION%%", QuexVersionID], ["%%TOKEN_CLASS%%", setup.input_token_class_name], ["%%TOKEN_CLASS_DEFINITION_FILE%%", setup.input_token_class_file.replace("//","/")], ["%%TOKEN_ID_DEFINITION_FILE%%", setup.output_token_id_file.replace("//","/")], ["%%QUEX_OUTPUT_FILESTEM%%", setup.output_file_stem], ]) fh_out = open(QuexClassHeaderFileOutput, "w") fh_out.write(txt) fh_out.close() fh_out = open(ModeClassImplementationFile, "w") fh_out.write(lexer_mode.header.get() + "\n") if DerivedClassHeaderFileName != "": fh_out.write("#include<" + DerivedClassHeaderFileName +">\n") else: fh_out.write("#include<" + setup.output_file_stem +">\n") fh_out.write("namespace quex {\n") mode_class_member_functions_txt = \ blue_print(mode_class_member_functions_txt, [["$$LEXER_CLASS_NAME$$", LexerClassName], ["%%TOKEN_CLASS%%", setup.input_token_class_name], ["%%LEXER_DERIVED_CLASS_NAME%%", setup.input_derived_class_name]]) fh_out.write(mode_class_member_functions_txt) fh_out.write("} // END: namespace quex\n") fh_out.close() quex_mode_init_call_str = """ quex_mode_init(&%%MN%%, this, LEX_ID_%%MN%%, "%%MN%%", $analyser_function, #ifdef __QUEX_OPTION_INDENTATION_TRIGGER_SUPPORT $on_indentation, #endif $on_entry, $on_exit #ifdef __QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK , $has_base, $has_entry_from, $has_exit_to #endif ); """ def __get_mode_init_call(mode): analyser_function = "$$LEXER_CLASS_NAME$$__%s_analyser_function" % mode.name on_indentation = "$$LEXER_CLASS_NAME$$__%s_on_indentation" % mode.name on_entry = "$$LEXER_CLASS_NAME$$__%s_on_entry" % mode.name on_exit = "$$LEXER_CLASS_NAME$$__%s_on_exit" % mode.name has_base = "$$LEXER_CLASS_NAME$$__%s_has_base" % mode.name has_entry_from = "$$LEXER_CLASS_NAME$$__%s_has_entry_from" % mode.name has_exit_to = "$$LEXER_CLASS_NAME$$__%s_has_exit_to" % mode.name if mode.options["inheritable"] == "only": analyser_function = "/* %s = */ 0x0" % analyser_function if mode.on_entry_code_fragments() == []: on_entry = "/* %s = */ $$LEXER_CLASS_NAME$$_on_entry_exit_null_function" % on_entry if mode.on_exit_code_fragments() == []: on_exit = "/* %s = */ $$LEXER_CLASS_NAME$$_on_entry_exit_null_function" % on_exit if mode.on_indentation_code_fragments() == []: on_indentation = "/* %s = */ 0x0" % on_indentation txt = blue_print(quex_mode_init_call_str, [["%%MN%%", mode.name], ["$analyser_function", analyser_function], ["$on_indentation", on_indentation], ["$on_entry", on_entry], ["$on_exit", on_exit], ["$has_base", has_base], ["$has_entry_from", has_entry_from], ["$has_exit_to", has_exit_to]]) return txt def __get_mode_function_declaration(Modes, FriendF=False): if FriendF: prolog = " friend " else: prolog = " extern " def __mode_functions(Prolog, ReturnType, NameList, ArgList): txt = "" for name in NameList: function_signature = "%s $$LEXER_CLASS_NAME$$__%s_%s(%s);" % \ (ReturnType, mode.name, name, ArgList) txt += "%s" % Prolog + " " + function_signature + "\n" return txt txt = "" for mode in Modes: if mode.options["inheritable"] != "only": txt += __mode_functions(prolog, "__QUEX_SETTING_ANALYSER_FUNCTION_RETURN_TYPE", ["analyser_function"], "$$LEXER_CLASS_NAME$$*") for mode in Modes: if mode.on_indentation_code_fragments() != []: txt += __mode_functions(prolog, "void", ["on_indentation"], "$$LEXER_CLASS_NAME$$*, const quex_mode*") for mode in Modes: if mode.on_entry_code_fragments() != []: txt += __mode_functions(prolog, "void", ["on_entry"], "$$LEXER_CLASS_NAME$$*, const quex_mode*") if mode.on_exit_code_fragments() != []: txt += __mode_functions(prolog, "void", ["on_exit"], "$$LEXER_CLASS_NAME$$*, const quex_mode*") txt += "#ifdef __QUEX_OPTION_RUNTIME_MODE_TRANSITION_CHECK\n" for mode in Modes: txt += __mode_functions(prolog, "bool", ["has_base", "has_entry_from", "has_exit_to"], "const quex_mode*") txt += "#endif\n" txt += "\n" return txt def get_mode_class_related_code_fragments(Modes): """ RETURNS: -- members of the lexical analyzer class for the mode classes -- static member functions declaring the analyzer functions for he mode classes -- constructor init expressions (before '{'), -- constructor text to be executed at construction time -- friend declarations for the mode classes/functions """ L = max(map(lambda m: len(m.name), Modes)) members_txt = "" for mode in Modes: members_txt += " quex_mode %s;\n" % mode.name # constructor code txt = "" for mode in Modes: txt += " assert(LEX_ID_%s %s<= %i);\n" % (mode.name, " " * (L-len(mode.name)), len(Modes)) for mode in Modes: txt += __get_mode_init_call(mode) for mode in Modes: txt += " mode_db[LEX_ID_%s]%s = &%s;\n" % (mode.name, " " * (L-len(mode.name)), mode.name) constructor_txt = txt mode_functions_txt = __get_mode_function_declaration(Modes, FriendF=False) friends_txt = __get_mode_function_declaration(Modes, FriendF=True) return members_txt, \ constructor_txt, \ mode_functions_txt, \ friends_txt
def _do(Descr): # The following things must be ensured before the function is called assert Descr != None assert Descr.__class__.__name__ == "TokenTypeDescriptor" ## ALLOW: Descr.get_member_db().keys() == [] TemplateFile = QUEX_PATH \ + Setup.language_db["$code_base"] \ + Setup.language_db["$token_template_file"] TemplateIFile = QUEX_PATH \ + Setup.language_db["$code_base"] \ + Setup.language_db["$token_template_i_file"] template_str = open_file_or_die(TemplateFile, Mode="rb").read() template_i_str = open_file_or_die(TemplateIFile, Mode="rb").read() virtual_destructor_str = "" if Descr.open_for_derivation_f: virtual_destructor_str = "virtual " if Descr.copy.get_pure_code() == "": # Default copy operation: Plain Copy of token memory copy_str = "__QUEX_STD_memcpy((void*)__this, (void*)__That, sizeof(QUEX_TYPE_TOKEN));\n" else: copy_str = Descr.copy.get_code() take_text_str = Descr.take_text.get_code() if take_text_str == "": take_text_str = "return true;\n" include_guard_extension_str = get_include_guard_extension( Setup.language_db["$namespace-ref"](Descr.name_space) + "__" + Descr.class_name) # In case of plain 'C' the class name must incorporate the namespace (list) token_class_name = Descr.class_name if Setup.language == "C": token_class_name = Setup.token_class_name_safe txt = blue_print(template_str, [ ["$$BODY$$", Descr.body.get_code()], ["$$CONSTRUCTOR$$", Descr.constructor.get_code()], ["$$COPY$$", copy_str], ["$$DESTRUCTOR$$", Descr.destructor.get_code()], ["$$DISTINCT_MEMBERS$$", get_distinct_members(Descr)], ["$$FOOTER$$", Descr.footer.get_code()], ["$$FUNC_TAKE_TEXT$$", take_text_str], ["$$HEADER$$", Descr.header.get_code()], ["$$INCLUDE_GUARD_EXTENSION$$", include_guard_extension_str], ["$$NAMESPACE_CLOSE$$", Setup.language_db["$namespace-close"](Descr.name_space)], ["$$NAMESPACE_OPEN$$", Setup.language_db["$namespace-open"](Descr.name_space)], ["$$QUICK_SETTERS$$", get_quick_setters(Descr)], ["$$SETTERS_GETTERS$$", get_setter_getter(Descr)], ["$$TOKEN_CLASS$$", token_class_name], ["$$TOKEN_REPETITION_N_GET$$", Descr.repetition_get.get_code()], ["$$TOKEN_REPETITION_N_SET$$", Descr.repetition_set.get_code()], ["$$UNION_MEMBERS$$", get_union_members(Descr)], ["$$VIRTUAL_DESTRUCTOR$$", virtual_destructor_str], ]) txt_i = blue_print(template_i_str, [ ["$$CONSTRUCTOR$$", Descr.constructor.get_code()], ["$$COPY$$", copy_str], ["$$DESTRUCTOR$$", Descr.destructor.get_code()], ["$$FOOTER$$", Descr.footer.get_code()], ["$$FUNC_TAKE_TEXT$$", take_text_str], ["$$INCLUDE_GUARD_EXTENSION$$", include_guard_extension_str], ["$$NAMESPACE_CLOSE$$", Setup.language_db["$namespace-close"](Descr.name_space)], ["$$NAMESPACE_OPEN$$", Setup.language_db["$namespace-open"](Descr.name_space)], ["$$TOKEN_CLASS$$", token_class_name], ["$$TOKEN_REPETITION_N_GET$$", Descr.repetition_get.get_code()], ["$$TOKEN_REPETITION_N_SET$$", Descr.repetition_set.get_code()], ]) # Return declaration and implementation as two strings return txt, txt_i
import os import sys sys.path.insert(0, os.environ["QUEX_PATH"]) from quex.output.cpp.core import write_configuration_header from quex.input.setup import setup as Setup import quex.input.setup_parser as setup_parser from quex.frs_py.file_in import open_file_or_die import quex.input.quex_file_parser as quex_file_parser setup_parser.do([ "-i", "nothing", "-o", "TestAnalyzer", "--token-policy", "single", "--no-include-stack" ]) # Parse default token file fh = open_file_or_die(os.environ["QUEX_PATH"] + Setup.language_db["$code_base"] + Setup.language_db["$token-default-file"]) quex_file_parser.parse_section(fh) fh.close() BeginOfLineSupportF = True IndentationSupportF = False txt = write_configuration_header({}, IndentationSupportF, BeginOfLineSupportF) open("TestAnalyzer-configuration", "w").write(txt)