def __copy_files(FileTxt): input_directory = QUEX_PATH output_directory = Setup.output_directory file_list = map(lambda x: Setup.language_db["$code_base"] + x.strip(), FileTxt.split()) # Ensure that all directories exist directory_list = [] for file in file_list: directory = path.dirname(output_directory + file) if directory in directory_list: continue directory_list.append(directory) # Sort directories according to length --> create parent directories before child for directory in sorted(directory_list, key=len): if os.access(directory, os.F_OK) == True: continue # Create also parent directories, if required os.makedirs(directory) for file in file_list: input_file = input_directory + file output_file = output_directory + file # Copy content = open_file_or_die(input_file, "rb").read() write_safely_and_close(output_file, content)
def UserCodeFragment_straighten_open_line_pragmas(filename, Language): if Language not in UserCodeFragment_OpenLinePragma.keys(): return fh = open_file_or_die(filename) norm_filename = get_file_reference(filename) new_content = "" line_n = 0 LinePragmaInfoList = UserCodeFragment_OpenLinePragma[Language] for line in fh.readlines(): line_n += 1 if Language == "C": for info in LinePragmaInfoList: if line.find(info[0]) == -1: continue line = info[1] # Since by some definition, line number pragmas < 32768; let us avoid # compiler warnings by setting line_n = min(line_n, 32768) line = line.replace("NUMBER", repr(int(min(line_n + 1, 32767)))) # Even under Windows (tm), the '/' is accepted. Thus do not rely on 'normpath' line = line.replace("FILENAME", norm_filename) line = line + "\n" new_content += line fh.close() write_safely_and_close(filename, new_content)
def do(): if Setup.buffer_codec == "": return if Setup.buffer_codec_transformation_info in ["utf8-state-split", "utf16-state-split"]: return assert Setup.buffer_codec_transformation_info != None txt, txt_i = _do(Setup.buffer_codec_transformation_info, Setup.buffer_codec) write_safely_and_close(Setup.output_buffer_codec_header, txt) write_safely_and_close(Setup.output_buffer_codec_header_i, txt_i)
def do(): if Setup.buffer_codec == "": return if Setup.buffer_codec_transformation_info in [ "utf8-state-split", "utf16-state-split" ]: return assert Setup.buffer_codec_transformation_info != None txt, txt_i = _do(Setup.buffer_codec_transformation_info, Setup.buffer_codec) write_safely_and_close(Setup.output_buffer_codec_header, txt) write_safely_and_close(Setup.output_buffer_codec_header_i, txt_i)
def do(setup, IndentationSupportF): """Creates a file of token-ids from a given set of names. Creates also a function: const string& $$token$$::map_id_to_name(). """ global file_str LanguageDB = Setup.language_db __propose_implicit_token_definitions() for standard_token_id in standard_token_id_list: assert token_id_db.has_key(standard_token_id) assert lexer_mode.token_type_definition != None, \ "Token type has not been defined yet, see $QUEX_PATH/quex/core.py how to\n" + \ "handle this." # (*) Token ID File ________________________________________________________________ # # The token id file can either be specified as database of # token-id names, or as a file that directly assigns the token-ids # to variables. If the flag '--user-token-id-file' is defined, then # then the token-id file is provided by the user. Otherwise, the # token id file is created by the token-id maker. # # The token id maker considers the file passed by the option '-t' # as the database file and creates a C++ file with the output filestem # plus the suffix "--token-ids". Note, that the token id file is a # header file. # if len(token_id_db.keys()) == len(standard_token_id_list): token_id_str = "%sTERMINATION and %sUNINITIALIZED" % \ (setup.token_id_prefix_plain, setup.token_id_prefix_plain) # TERMINATION + UNINITIALIZED = 2 token ids. If they are the only ones nothing can be done. error_msg("Only token ids %s are defined.\n" % token_id_str + \ "Quex refuses to proceed. Please, use the 'token { ... }' section to\n" + \ "specify at least one other token id.") #______________________________________________________________________________________ L = max(map(lambda name: len(name), token_id_db.keys())) def space(Name): return " " * (L - len(Name)) # -- define values for the token ids def define_this(txt, token): if setup.language == "C": txt.append("#define %s%s %s((QUEX_TYPE_TOKEN_ID)%i)\n" \ % (setup.token_id_prefix_plain, token.name, space(token.name), token.number)) else: txt.append("const QUEX_TYPE_TOKEN_ID %s%s%s = ((QUEX_TYPE_TOKEN_ID)%i);\n" \ % (setup.token_id_prefix_plain, token.name, space(token.name), token.number)) if setup.token_id_foreign_definition_file != "": token_id_txt = ["#include \"%s\"\n" % get_file_reference(setup.token_id_foreign_definition_file)] else: if setup.language == "C": prolog = "" epilog = "" else: prolog = LanguageDB["$namespace-open"](setup.token_id_prefix_name_space) epilog = LanguageDB["$namespace-close"](setup.token_id_prefix_name_space) token_id_txt = [prolog] # Assign values to tokens with no numeric identifier # NOTE: This has not to happen if token's are defined by the user's provided file. i = setup.token_id_counter_offset # Take the 'dummy_name' only to have the list sorted by name. The key 'dummy_name' # may contain '--' to indicate a unicode value, so do not use it as name. for dummy_name, token in sorted(token_id_db.items()): if token.number == None: while __is_token_id_occupied(i): i += 1 token.number = i; define_this(token_id_txt, token) # Double check that no token id appears twice # Again, this can only happen, if quex itself produced the numeric values for the token token_list = token_id_db.values() for i, x in enumerate(token_list): for y in token_list[i+1:]: if x.number != y.number: continue error_msg("Token id '%s'" % x.name, x.file_name, x.line_n, DontExitF=True) error_msg("and token id '%s' have same numeric value '%s'." \ % (y.name, x.number), y.file_name, y.line_n, DontExitF=True) token_id_txt.append(epilog) tc_descr = lexer_mode.token_type_definition content = blue_print(file_str, [["$$TOKEN_ID_DEFINITIONS$$", "".join(token_id_txt)], ["$$DATE$$", time.asctime()], ["$$TOKEN_CLASS_DEFINITION_FILE$$", get_file_reference(lexer_mode.token_type_definition.get_file_name())], ["$$INCLUDE_GUARD_EXT$$", get_include_guard_extension( LanguageDB["$namespace-ref"](tc_descr.name_space) + "__" + tc_descr.class_name)], ["$$TOKEN_PREFIX$$", setup.token_id_prefix]]) write_safely_and_close(setup.output_token_id_file, content)
def do(): """Generates state machines for all modes. Each mode results into a separate state machine that is stuck into a virtual function of a class derived from class 'quex_mode'. """ token_id_maker.prepare_default_standard_token_ids() mode_db = __get_mode_db(Setup) IndentationSupportF = lexer_mode.requires_indentation_count(mode_db) BeginOfLineSupportF = lexer_mode.requires_begin_of_line_condition_support(mode_db) # (*) Implement the 'quex' core class from a template # -- do the coding of the class framework header_engine_txt, \ constructor_and_memento_txt, \ header_configuration_txt = quex_class_out.do(mode_db, IndentationSupportF, BeginOfLineSupportF) mode_implementation_txt = mode_classes.do(mode_db) # (*) Generate the token ids # (This needs to happen after the parsing of mode_db, since during that # the token_id_db is developped.) token_id_maker.do(Setup, IndentationSupportF) map_id_to_name_function_implementation_txt = token_id_maker.do_map_id_to_name_function() # (*) [Optional] Make a customized token class token_class_h, token_class_txt = token_class_maker.do() # (*) [Optional] Generate a converter helper codec_converter_helper.do() # (*) implement the lexer mode-specific analyser functions inheritance_info_str = "" analyzer_code = "" # (*) Get list of modes that are actually implemented # (abstract modes only serve as common base) mode_list = filter(lambda mode: mode.options["inheritable"] != "only", mode_db.values()) mode_name_list = map(lambda mode: mode.name, mode_list) for mode in mode_list: # accumulate inheritance information for comment code = get_code_for_mode(mode, mode_name_list, IndentationSupportF, BeginOfLineSupportF) analyzer_code += code if Setup.comment_mode_patterns_f: inheritance_info_str += mode.get_documentation() # Bring the info about the patterns first if Setup.comment_mode_patterns_f: analyzer_code += Setup.language_db["$ml-comment"]("BEGIN: MODE PATTERNS\n" + \ inheritance_info_str + \ "\nEND: MODE PATTERNS") analyzer_code += "\n" # For safety: New content may have to start in a newline, e.g. "#ifdef ..." # generate frame for analyser code analyzer_code = generator.frame_this(analyzer_code) # Implementation (Potential Inline Functions) implemtation_txt = constructor_and_memento_txt + "\n" \ + token_class_txt + "\n" # Engine (Source Code) source_txt = mode_implementation_txt + "\n" \ + analyzer_code + "\n" \ + map_id_to_name_function_implementation_txt + "\n" # (*) Write Files write_safely_and_close(Setup.output_configuration_file, header_configuration_txt) if Setup.language == "C": write_safely_and_close(Setup.output_header_file, header_engine_txt) write_safely_and_close(Setup.output_code_file, source_txt + implemtation_txt) else: header_txt = header_engine_txt.replace("$$ADDITIONAL_HEADER_CONTENT$$", implemtation_txt) write_safely_and_close(Setup.output_header_file, header_txt) write_safely_and_close(Setup.output_code_file, source_txt) if token_class_h != "": write_safely_and_close(lexer_mode.token_type_definition.get_file_name(), token_class_h) UserCodeFragment_straighten_open_line_pragmas(Setup.output_header_file, "C") UserCodeFragment_straighten_open_line_pragmas(Setup.output_code_file, "C") # assert lexer_mode.token_type_definition != None UserCodeFragment_straighten_open_line_pragmas(lexer_mode.token_type_definition.get_file_name(), "C") if Setup.source_package_directory != "": source_package.do()