def prepare(command_line, argv): """RETURN: True, if process needs to be started. False, if job is done. """ # (*) Classes and their namespace __setup_analyzer_class(Setup) __setup_token_class(Setup) __setup_token_id_prefix(Setup) # (*) Line and Column number counting if Setup.__no_count_line_and_column_f: Setup.count_line_number_f = False Setup.count_column_number_f = False # (*) Output programming language Setup.language = Setup.language.upper() error.verify_word_in_list( Setup.language, output_language_db.keys(), "Programming language '%s' is not supported." % Setup.language) Setup.language_db = output_language_db[Setup.language]() # Is the output file naming scheme provided by the extension database # (Validation must happen immediately) Setup.language_db.extension_db = Setup.language_db.all_extension_db.get( Setup.output_file_naming_scheme) if Setup.language_db.extension_db is None: error.log("File extension scheme '%s' is not provided for language '%s'.\n" \ % (Setup.output_file_naming_scheme, Setup.language) + \ "Available schemes are: %s." % repr(sorted(Setup.language_db.all_extension_db.keys()))[1:-1]) if Setup.__buffer_lexatom_size_in_byte == "wchar_t": error.log( "Since Quex version 0.53.5, 'wchar_t' can no longer be specified\n" "with option '--buffer-element-size' or '-bes'. Please, specify\n" "'--buffer-element-type wchar_t' or '--bet'.") Setup.buffer_setup(Setup.__buffer_lexatom_type, Setup.__buffer_lexatom_size_in_byte, Setup.buffer_encoding_name, Setup.buffer_encoding_file) type_info = global_character_type_db.get(Setup.lexatom.type) if type_info is not None and len(type_info) >= 4 \ and type_info[3] != -1 and Setup.lexatom.size_in_byte != -1 \ and type_info[3] != Setup.lexatom.size_in_byte: error.log("\nBuffer element type ('--bet' or '--buffer-element-type') was set to '%s'.\n" \ % Setup.lexatom.type \ + "It is well known to be of size %s[byte]. However, the buffer element size\n" \ % type_info[3] \ + "('-b' or '--buffer-element-type') was specified as '%s'.\n\n" \ % Setup.lexatom.size_in_byte \ + "Quex can continue, but the result is questionable.\n", \ DontExitF=True) if Setup.extern_token_id_specification: if len(Setup.extern_token_id_specification) > 3: error.log( "Option '--foreign-token-id-file' received > 3 followers.\n" "Found: %s" % str(Setup.extern_token_id_specification)[1:-1]) if len(Setup.extern_token_id_specification) > 1: Setup.token_id_foreign_definition_file_region_begin_re = \ __compile_regular_expression(Setup.extern_token_id_specification[1], "token id region begin") if len(Setup.extern_token_id_specification) > 2: Setup.token_id_foreign_definition_file_region_end_re = \ __compile_regular_expression(Setup.extern_token_id_specification[2], "token id region end") Setup.extern_token_id_file = \ Setup.extern_token_id_specification[0] token_id_file_parse(Setup.extern_token_id_file) # AFTER: Setup.extern_token_id_file !!! Setup.prepare_output_directory() if Setup.language not in ["DOT"]: Setup.prepare_all_file_names() # (*) Compression Types compression_type_list = [] for name, ctype in [ ("compression_template_f", E_Compression.TEMPLATE), ("compression_template_uniform_f", E_Compression.TEMPLATE_UNIFORM), ("compression_path_f", E_Compression.PATH), ("compression_path_uniform_f", E_Compression.PATH_UNIFORM) ]: if command_line_args_defined(command_line, name): compression_type_list.append( (command_line_arg_position(name), ctype)) compression_type_list.sort(key=itemgetter(0)) Setup.compression_type_list = map(lambda x: x[1], compression_type_list) validation.do(Setup, command_line, argv) # (*) return Setup ___________________________________________________________________ return True
def do(setup, command_line, argv): """Does a consistency check for setup and the command line. """ setup.output_directory = os.path.normpath(setup.output_directory) if setup.output_directory: # Check, if the output directory exists if os.access(setup.output_directory, os.F_OK) == False: error.log( "The directory %s was specified for output, but does not exists." % setup.output_directory) if os.access(setup.output_directory, os.W_OK) == False: error.log( "The directory %s was specified for output, but is not writeable." % setup.output_directory) # if the mode is '--language dot' => check character display options. if setup.character_display not in ["hex", "utf8"]: error.log( "Character display must be either 'hex' or 'utf8'.\nFound: '%s'" % setup.character_display) # ensure that options are not specified twice for parameter, info in SETUP_INFO.items(): if type(info) != list: continue occurence_n = 0 for option in info[0]: occurence_n += argv.count(option) if occurence_n > 1 and info[1] not in (SetupParTypes.LIST, SetupParTypes.INT_LIST): error.log("Received more than one of the following options:\n" + \ "%s" % repr(info[0])[1:-1]) # (*) Check for 'Depraceted' Options ___________________________________________________ for name, info in DEPRECATED.items(): command_line_options = SETUP_INFO[name][0] comment = info[0] depreciated_since_version = info[1] for option in command_line_options: if command_line.search(option): error.log("Command line option '%s' is ignored.\n" % option + \ comment + "\n" + \ "Last version of Quex supporting this option is version %s. Please, visit\n" % \ depreciated_since_version + \ "http://quex.org for further information.") # (*) Check for 'Straying' Options ___________________________________________________ options = [] for key, info in SETUP_INFO.items(): if type(info) != list: continue if key in DEPRECATED: continue if info[1] is not None: options.extend(info[0]) options.sort(lambda a, b: cmp(a.replace("-", ""), b.replace("-", ""))) ufos = command_line.unidentified_options(options) if len(ufos) != 0: error.log("Unidentified option(s) = " + repr(ufos) + "\n" + \ __get_supported_command_line_option_description(options)) if setup.analyzer_derived_class_name != "" and \ setup.analyzer_derived_class_file == "": error.log("Specified derived class '%s' on command line, but it was not\n" % \ setup.analyzer_derived_class_name + \ "specified which file contains the definition of it.\n" + \ "use command line option '--derived-class-file'.\n") if setup.buffer_element_size not in [-1, 1, 2, 4]: error.log( "The setting of '--buffer-element-size' (or '-b') can only be\n" "1, 2, or 4 (found %s)." % repr(setup.buffer_element_size)) if setup.buffer_byte_order not in ["<system>", "little", "big"]: error.log("Byte order (option --endian) must be 'little', 'big', or '<system>'.\n" + \ "Note, that this option is only interesting for cross plattform development.\n" + \ "By default, quex automatically chooses the endian type of your system.") # Manually written token class requires token class name to be specified if setup.token_class_file != "" and command_line.search( "--token-class", "--tc") == False: error.log( "The use of a manually written token class requires that the name of the class\n" "is specified on the command line via the '--token-class' option.") # Token queue if setup.token_policy != "queue" and command_line.search( "--token-queue-size"): error.log("Option --token-queue-size determines a fixed token queue size. This makes\n" + \ "only sense in conjunction with '--token-policy queue'.\n") if setup.token_queue_size <= setup.token_queue_safety_border + 1: if setup.token_queue_size == setup.token_queue_safety_border: cmp_str = "equal to" else: cmp_str = "less than" error.log("Token queue size is %i is %s token queue safety border %i + 1.\n" % \ (setup.token_queue_size, cmp_str, setup.token_queue_safety_border) + "Set appropriate values with --token-queue-size and --token-queue-safety-border.") # Check that names are valid identifiers if len(setup.token_id_prefix_plain) != 0: __check_identifier(setup, "token_id_prefix_plain", "Token prefix") __check_identifier(setup, "analyzer_class_name", "Engine name") if setup.analyzer_derived_class_name != "": __check_identifier(setup, "analyzer_derived_class_name", "Derived class name") __check_file_name(setup, "token_class_file", "file containing token class definition") __check_file_name(setup, "analyzer_derived_class_file", "file containing user derived lexer class") __check_file_name( setup, "token_id_foreign_definition_file", "file containing user token ids", 0, CommandLineOption=SETUP_INFO["token_id_foreign_definition"][0]) __check_file_name(setup, "input_mode_files", "quex source file") # Check that not more than one converter is specified converter_n = 0 if setup.converter_iconv_f: converter_n += 1 if setup.converter_icu_f: converter_n += 1 if len(setup.converter_user_new_func) != 0: converter_n += 1 if converter_n > 1: error.log("More than one character converter has been specified. Note, that the\n" + \ "options '--icu', '--iconv', and '--converter-new' (or '--cn') are\n" + \ "to be used mutually exclusively.") if converter_n == 1 and setup.buffer_codec.name != "unicode": # If the buffer codec is other than unicode, then no converter shall # be used to fill the buffer. Instead, the engine is transformed, so # that it works directly on the codec. error.log("An engine that is to be generated for a specific codec cannot rely\n" + \ "on converters. Do no use '--codec' together with '--icu', '--iconv', or\n" + \ "`--converter-new`.") # If a converter has been specified and no bytes-element-size has been specified, # it defaults to '1 byte' which is most likely not what is desired for unicode. if converter_n == 1 \ and setup.buffer_element_size == 1 \ and not command_line_args_defined(command_line, "buffer_element_size") \ and not command_line_args_defined(command_line, "buffer_element_type"): error.log("A converter has been specified, but the default buffer element size\n" + \ "is left to 1 byte. Consider %s or %s." \ % (command_line_args_string("buffer_element_size"), command_line_args_string("buffer_element_type"))) # If a user defined type is specified for 'engine character type' and # a converter, then the name of the target type must be specified explicitly. if setup.buffer_element_type != "" \ and not global_character_type_db.has_key(setup.buffer_element_type) \ and setup.converter_ucs_coding_name == "" \ and converter_n != 0: tc = setup.buffer_element_type error.log("A character code converter has been specified. It is supposed to convert\n" + \ "incoming data into an internal buffer of unicode characters. The size of\n" + \ "each character is determined by '%s' which is a user defined type.\n" % tc + \ "\n" + \ "Quex cannot determine automatically the name that the converter requires\n" + \ "to produce unicode characters for type '%s'. It must be specified by the\n" % tc + \ "command line option %s." \ % command_line_args_string("converter_ucs_coding_name")) # Token transmission policy token_policy_list = ["queue", "single", "users_token", "users_queue"] if setup.token_policy not in token_policy_list: error.log("Token policy '%s' not supported. Use one of the following:\n" % setup.token_policy + \ repr(token_policy_list)[1:-1]) elif setup.token_policy == "users_token": error.log( "Token policy 'users_queue' has be deprecated since 0.49.1. Use\n" "equivalent policy 'single'.") elif setup.token_policy == "users_queue": error.log( "Token policy 'users_queue' has be deprecated since 0.49.1\n") # Internal engine character encoding def __codec_vs_buffer_element_size(CodecName, RequiredBufferElementSize): if setup.buffer_codec.name != CodecName: return elif setup.buffer_element_size == RequiredBufferElementSize: return if setup.buffer_element_size == -1: msg_str = "undetermined (found type '%s')" % setup.buffer_element_type else: msg_str = "is not %i (found %i)" % (RequiredBufferElementSize, setup.buffer_element_size) error.log("Using codec '%s' while buffer element size %s.\n" % (CodecName, msg_str) + "Consult command line argument %s" \ % command_line_args_string("buffer_element_size")) if setup.buffer_codec.name != "unicode": if not setup.buffer_codec_file: error.verify_word_in_list( setup.buffer_codec_name, codec_db.get_supported_codec_list() + ["utf8", "utf16"], "Codec '%s' is not supported." % setup.buffer_codec.name) __codec_vs_buffer_element_size("utf8", 1) __codec_vs_buffer_element_size("utf16", 2) if setup.external_lexeme_null_object and setup.token_class_only_f: error.log( "Specifying an external lexeme null object signalizes an\n" "external token class implementation. The 'token class only\n" "flag' generates a token class considered to be externally\n" "shared. Both flags are mutually exclusive.") if setup.string_accumulator_f: error_n = NotificationDB.warning_on_no_token_class_take_text if error_n in setup.suppressed_notification_list: error.warning( "The warning upon missing 'take_text' in token type definition is de-\n" + "activated by '--suppress %i'. This is dangerous, if there is a string\n" % error_n + "accumulator. May be, use '--no-string-accumulator'.", -1, SuppressCode=NotificationDB. warning_on_no_warning_on_missing_take_text)
def prepare(command_line, argv): """RETURN: True, if process needs to be started. False, if job is done. """ global Setup # (*) Classes and their namespace __setup_analyzer_class(Setup) __setup_token_class(Setup) __setup_token_id_prefix(Setup) __setup_lexeme_null(Setup) # Requires 'token_class_name_space' # (*) Output programming language Setup.language = Setup.language.upper() error.verify_word_in_list(Setup.language, output_language_db.keys(), "Programming language '%s' is not supported." % Setup.language) Setup.language_db = output_language_db[Setup.language] Setup.extension_db = global_extension_db[Setup.language] # Is the output file naming scheme provided by the extension database # (Validation must happen immediately) if Setup.extension_db.has_key(Setup.output_file_naming_scheme) == False: error.log("File extension scheme '%s' is not provided for language '%s'.\n" \ % (Setup.output_file_naming_scheme, Setup.language) + \ "Available schemes are: %s." % repr(Setup.extension_db.keys())[1:-1]) if Setup.buffer_byte_order == "<system>": Setup.buffer_byte_order = sys.byteorder Setup.byte_order_is_that_of_current_system_f = True else: Setup.byte_order_is_that_of_current_system_f = False Setup.buffer_element_specification_prepare() if Setup.buffer_codec_name == "utf8": module = utf8_state_split elif Setup.buffer_codec_name == "utf16": module = utf16_state_split else: module = None Setup.buffer_codec_prepare(Setup.buffer_codec_name, Setup.buffer_codec_file, module) # AFTER: Setup.buffer_codec_prepare() !!! if Setup.language not in ["DOT"]: prepare_file_names(Setup) type_info = global_character_type_db.get(Setup.buffer_element_type) if type_info is not None and len(type_info) >= 4 \ and type_info[3] != -1 and Setup.buffer_element_size != -1 \ and type_info[3] != Setup.buffer_element_size: error.log("\nBuffer element type ('--bet' or '--buffer-element-type') was set to '%s'.\n" \ % Setup.buffer_element_type \ + "It is well known to be of size %s[byte]. However, the buffer element size\n" \ % type_info[3] \ + "('-b' or '--buffer-element-type') was specified as '%s'.\n\n" \ % Setup.buffer_element_size \ + "Quex can continue, but the result is questionable.\n", \ DontExitF=True) Setup.converter_f = False if Setup.converter_iconv_f or Setup.converter_icu_f or len(Setup.converter_user_new_func) != 0: Setup.converter_f = True # The only case where no converter helper is required is where ASCII # (Unicode restricted to [0, FF] is used. Setup.converter_helper_required_f = True if Setup.converter_f == False and Setup.buffer_element_size == 1 and Setup.buffer_codec.name == "unicode": Setup.converter_helper_required_f = False validation.do(Setup, command_line, argv) if Setup.converter_ucs_coding_name == "": if global_character_type_db.has_key(Setup.buffer_element_type): if Setup.buffer_byte_order == "little": index = 1 else: index = 2 Setup.converter_ucs_coding_name = global_character_type_db[Setup.buffer_element_type][index] if len(Setup.token_id_foreign_definition) != 0: if len(Setup.token_id_foreign_definition) > 3: error.log("Option '--foreign-token-id-file' received > 3 followers.\n" "Found: %s" % str(Setup.token_id_foreign_definition)[1:-1]) if len(Setup.token_id_foreign_definition) > 1: Setup.token_id_foreign_definition_file_region_begin_re = \ __compile_regular_expression(Setup.token_id_foreign_definition[1], "token id region begin") if len(Setup.token_id_foreign_definition) > 2: Setup.token_id_foreign_definition_file_region_end_re = \ __compile_regular_expression(Setup.token_id_foreign_definition[2], "token id region end") Setup.token_id_foreign_definition_file = \ Setup.token_id_foreign_definition[0] CommentDelimiterList = [["//", "\n"], ["/*", "*/"]] token_id_file_parse(Setup.token_id_foreign_definition_file, CommentDelimiterList) # (*) Compression Types compression_type_list = [] for name, ctype in [("compression_template_f", E_Compression.TEMPLATE), ("compression_template_uniform_f", E_Compression.TEMPLATE_UNIFORM), ("compression_path_f", E_Compression.PATH), ("compression_path_uniform_f", E_Compression.PATH_UNIFORM)]: if command_line_args_defined(command_line, name): compression_type_list.append((command_line_arg_position(name), ctype)) compression_type_list.sort(key=itemgetter(0)) Setup.compression_type_list = map(lambda x: x[1], compression_type_list) # (*) return Setup ___________________________________________________________________ return True
def do(setup, command_line, argv): """Does a consistency check for setup and the command line. """ setup.output_directory = os.path.normpath(setup.output_directory) if setup.output_directory: # Check, if the output directory exists if os.access(setup.output_directory, os.F_OK) == False: error.log("The directory %s was specified for output, but does not exists." % setup.output_directory) if os.access(setup.output_directory, os.W_OK) == False: error.log("The directory %s was specified for output, but is not writeable." % setup.output_directory) # if the mode is '--language dot' => check character display options. if setup.character_display not in ["hex", "utf8"]: error.log("Character display must be either 'hex' or 'utf8'.\nFound: '%s'" % setup.character_display) # ensure that options are not specified twice for parameter, info in SETUP_INFO.items(): if type(info) != list: continue occurence_n = 0 for option in info[0]: occurence_n += argv.count(option) if occurence_n > 1 and info[1] not in (SetupParTypes.LIST, SetupParTypes.INT_LIST): error.log("Received more than one of the following options:\n" + \ "%s" % repr(info[0])[1:-1]) # (*) Check for 'Depraceted' Options ___________________________________________________ for name, info in DEPRECATED.items(): command_line_options = SETUP_INFO[name][0] comment = info[0] depreciated_since_version = info[1] for option in command_line_options: if command_line.search(option): error.log("Command line option '%s' is ignored.\n" % option + \ comment + "\n" + \ "Last version of Quex supporting this option is version %s. Please, visit\n" % \ depreciated_since_version + \ "http://quex.org for further information.") # (*) Check for 'Straying' Options ___________________________________________________ options = [] for key, info in SETUP_INFO.items(): if type(info) != list: continue if key in DEPRECATED: continue if info[1] is not None: options.extend(info[0]) options.sort(lambda a,b: cmp(a.replace("-",""), b.replace("-",""))) ufos = command_line.unidentified_options(options) if len(ufos) != 0: error.log("Unidentified option(s) = " + repr(ufos) + "\n" + \ __get_supported_command_line_option_description(options)) if setup.analyzer_derived_class_name != "" and \ setup.analyzer_derived_class_file == "": error.log("Specified derived class '%s' on command line, but it was not\n" % \ setup.analyzer_derived_class_name + \ "specified which file contains the definition of it.\n" + \ "use command line option '--derived-class-file'.\n") if setup.buffer_element_size not in [-1, 1, 2, 4]: error.log("The setting of '--buffer-element-size' (or '-b') can only be\n" "1, 2, or 4 (found %s)." % repr(setup.buffer_element_size)) if setup.buffer_byte_order not in ["<system>", "little", "big"]: error.log("Byte order (option --endian) must be 'little', 'big', or '<system>'.\n" + \ "Note, that this option is only interesting for cross plattform development.\n" + \ "By default, quex automatically chooses the endian type of your system.") # Manually written token class requires token class name to be specified if setup.token_class_file != "" and command_line.search("--token-class", "--tc") == False: error.log("The use of a manually written token class requires that the name of the class\n" "is specified on the command line via the '--token-class' option.") # Token queue if setup.token_policy != "queue" and command_line.search("--token-queue-size"): error.log("Option --token-queue-size determines a fixed token queue size. This makes\n" + \ "only sense in conjunction with '--token-policy queue'.\n") if setup.token_queue_size <= setup.token_queue_safety_border + 1: if setup.token_queue_size == setup.token_queue_safety_border: cmp_str = "equal to" else: cmp_str = "less than" error.log("Token queue size is %i is %s token queue safety border %i + 1.\n" % \ (setup.token_queue_size, cmp_str, setup.token_queue_safety_border) + "Set appropriate values with --token-queue-size and --token-queue-safety-border.") # Check that names are valid identifiers if len(setup.token_id_prefix_plain) != 0: __check_identifier(setup, "token_id_prefix_plain", "Token prefix") __check_identifier(setup, "analyzer_class_name", "Engine name") if setup.analyzer_derived_class_name != "": __check_identifier(setup, "analyzer_derived_class_name", "Derived class name") __check_file_name(setup, "token_class_file", "file containing token class definition") __check_file_name(setup, "analyzer_derived_class_file", "file containing user derived lexer class") __check_file_name(setup, "token_id_foreign_definition_file", "file containing user token ids", 0, CommandLineOption=SETUP_INFO["token_id_foreign_definition"][0]) __check_file_name(setup, "input_mode_files", "quex source file") # Check that not more than one converter is specified converter_n = 0 if setup.converter_iconv_f: converter_n += 1 if setup.converter_icu_f: converter_n += 1 if len(setup.converter_user_new_func) != 0: converter_n += 1 if converter_n > 1: error.log("More than one character converter has been specified. Note, that the\n" + \ "options '--icu', '--iconv', and '--converter-new' (or '--cn') are\n" + \ "to be used mutually exclusively.") if converter_n == 1 and setup.buffer_codec.name != "unicode": # If the buffer codec is other than unicode, then no converter shall # be used to fill the buffer. Instead, the engine is transformed, so # that it works directly on the codec. error.log("An engine that is to be generated for a specific codec cannot rely\n" + \ "on converters. Do no use '--codec' together with '--icu', '--iconv', or\n" + \ "`--converter-new`.") # If a converter has been specified and no bytes-element-size has been specified, # it defaults to '1 byte' which is most likely not what is desired for unicode. if converter_n == 1 \ and setup.buffer_element_size == 1 \ and not command_line_args_defined(command_line, "buffer_element_size") \ and not command_line_args_defined(command_line, "buffer_element_type"): error.log("A converter has been specified, but the default buffer element size\n" + \ "is left to 1 byte. Consider %s or %s." \ % (command_line_args_string("buffer_element_size"), command_line_args_string("buffer_element_type"))) # If a user defined type is specified for 'engine character type' and # a converter, then the name of the target type must be specified explicitly. if setup.buffer_element_type != "" \ and not global_character_type_db.has_key(setup.buffer_element_type) \ and setup.converter_ucs_coding_name == "" \ and converter_n != 0: tc = setup.buffer_element_type error.log("A character code converter has been specified. It is supposed to convert\n" + \ "incoming data into an internal buffer of unicode characters. The size of\n" + \ "each character is determined by '%s' which is a user defined type.\n" % tc + \ "\n" + \ "Quex cannot determine automatically the name that the converter requires\n" + \ "to produce unicode characters for type '%s'. It must be specified by the\n" % tc + \ "command line option %s." \ % command_line_args_string("converter_ucs_coding_name")) # Token transmission policy token_policy_list = ["queue", "single", "users_token", "users_queue"] if setup.token_policy not in token_policy_list: error.log("Token policy '%s' not supported. Use one of the following:\n" % setup.token_policy + \ repr(token_policy_list)[1:-1]) elif setup.token_policy == "users_token": error.log("Token policy 'users_queue' has be deprecated since 0.49.1. Use\n" "equivalent policy 'single'.") elif setup.token_policy == "users_queue": error.log("Token policy 'users_queue' has be deprecated since 0.49.1\n") # Internal engine character encoding def __codec_vs_buffer_element_size(CodecName, RequiredBufferElementSize): if setup.buffer_codec.name != CodecName: return elif setup.buffer_element_size == RequiredBufferElementSize: return if setup.buffer_element_size == -1: msg_str = "undetermined (found type '%s')" % setup.buffer_element_type else: msg_str = "is not %i (found %i)" % (RequiredBufferElementSize, setup.buffer_element_size) error.log("Using codec '%s' while buffer element size %s.\n" % (CodecName, msg_str) + "Consult command line argument %s" \ % command_line_args_string("buffer_element_size")) if setup.buffer_codec.name != "unicode": if not setup.buffer_codec_file: error.verify_word_in_list(setup.buffer_codec_name, codec_db.get_supported_codec_list() + ["utf8", "utf16"], "Codec '%s' is not supported." % setup.buffer_codec.name) __codec_vs_buffer_element_size("utf8", 1) __codec_vs_buffer_element_size("utf16", 2) if setup.external_lexeme_null_object and setup.token_class_only_f: error.log("Specifying an external lexeme null object signalizes an\n" "external token class implementation. The 'token class only\n" "flag' generates a token class considered to be externally\n" "shared. Both flags are mutually exclusive.") if setup.string_accumulator_f: error_n = NotificationDB.warning_on_no_token_class_take_text if error_n in setup.suppressed_notification_list: error.warning("The warning upon missing 'take_text' in token type definition is de-\n" + "activated by '--suppress %i'. This is dangerous, if there is a string\n" % error_n + "accumulator. May be, use '--no-string-accumulator'.", -1, SuppressCode=NotificationDB.warning_on_no_warning_on_missing_take_text)
def prepare(command_line, argv): """RETURN: True, if process needs to be started. False, if job is done. """ global Setup # (*) Classes and their namespace __setup_analyzer_class(Setup) __setup_token_class(Setup) __setup_token_id_prefix(Setup) __setup_lexeme_null(Setup) # Requires 'token_class_name_space' # (*) Output programming language Setup.language = Setup.language.upper() error.verify_word_in_list( Setup.language, output_language_db.keys(), "Programming language '%s' is not supported." % Setup.language) Setup.language_db = output_language_db[Setup.language] Setup.extension_db = global_extension_db[Setup.language] # Is the output file naming scheme provided by the extension database # (Validation must happen immediately) if Setup.extension_db.has_key(Setup.output_file_naming_scheme) == False: error.log("File extension scheme '%s' is not provided for language '%s'.\n" \ % (Setup.output_file_naming_scheme, Setup.language) + \ "Available schemes are: %s." % repr(Setup.extension_db.keys())[1:-1]) if Setup.buffer_byte_order == "<system>": Setup.buffer_byte_order = sys.byteorder Setup.byte_order_is_that_of_current_system_f = True else: Setup.byte_order_is_that_of_current_system_f = False Setup.buffer_element_specification_prepare() if Setup.buffer_codec_name == "utf8": module = utf8_state_split elif Setup.buffer_codec_name == "utf16": module = utf16_state_split else: module = None Setup.buffer_codec_prepare(Setup.buffer_codec_name, Setup.buffer_codec_file, module) # AFTER: Setup.buffer_codec_prepare() !!! if Setup.language not in ["DOT"]: prepare_file_names(Setup) type_info = global_character_type_db.get(Setup.buffer_element_type) if type_info is not None and len(type_info) >= 4 \ and type_info[3] != -1 and Setup.buffer_element_size != -1 \ and type_info[3] != Setup.buffer_element_size: error.log("\nBuffer element type ('--bet' or '--buffer-element-type') was set to '%s'.\n" \ % Setup.buffer_element_type \ + "It is well known to be of size %s[byte]. However, the buffer element size\n" \ % type_info[3] \ + "('-b' or '--buffer-element-type') was specified as '%s'.\n\n" \ % Setup.buffer_element_size \ + "Quex can continue, but the result is questionable.\n", \ DontExitF=True) Setup.converter_f = False if Setup.converter_iconv_f or Setup.converter_icu_f or len( Setup.converter_user_new_func) != 0: Setup.converter_f = True # The only case where no converter helper is required is where ASCII # (Unicode restricted to [0, FF] is used. Setup.converter_helper_required_f = True if Setup.converter_f == False and Setup.buffer_element_size == 1 and Setup.buffer_codec.name == "unicode": Setup.converter_helper_required_f = False validation.do(Setup, command_line, argv) if Setup.converter_ucs_coding_name == "": if global_character_type_db.has_key(Setup.buffer_element_type): if Setup.buffer_byte_order == "little": index = 1 else: index = 2 Setup.converter_ucs_coding_name = global_character_type_db[ Setup.buffer_element_type][index] if len(Setup.token_id_foreign_definition) != 0: if len(Setup.token_id_foreign_definition) > 3: error.log( "Option '--foreign-token-id-file' received > 3 followers.\n" "Found: %s" % str(Setup.token_id_foreign_definition)[1:-1]) if len(Setup.token_id_foreign_definition) > 1: Setup.token_id_foreign_definition_file_region_begin_re = \ __compile_regular_expression(Setup.token_id_foreign_definition[1], "token id region begin") if len(Setup.token_id_foreign_definition) > 2: Setup.token_id_foreign_definition_file_region_end_re = \ __compile_regular_expression(Setup.token_id_foreign_definition[2], "token id region end") Setup.token_id_foreign_definition_file = \ Setup.token_id_foreign_definition[0] CommentDelimiterList = [["//", "\n"], ["/*", "*/"]] token_id_file_parse(Setup.token_id_foreign_definition_file, CommentDelimiterList) # (*) Compression Types compression_type_list = [] for name, ctype in [ ("compression_template_f", E_Compression.TEMPLATE), ("compression_template_uniform_f", E_Compression.TEMPLATE_UNIFORM), ("compression_path_f", E_Compression.PATH), ("compression_path_uniform_f", E_Compression.PATH_UNIFORM) ]: if command_line_args_defined(command_line, name): compression_type_list.append( (command_line_arg_position(name), ctype)) compression_type_list.sort(key=itemgetter(0)) Setup.compression_type_list = map(lambda x: x[1], compression_type_list) # (*) return Setup ___________________________________________________________________ return True
def __perform_setup(command_line, argv): """RETURN: True, if process needs to be started. False, if job is done. """ global setup # (*) Classes and their namespace __setup_analyzer_class(setup) __setup_token_class(setup) __setup_token_id_prefix(setup) __setup_lexeme_null(setup) # Requires 'token_class_name_space' # (*) Output programming language setup.language = setup.language.upper() verify_word_in_list( setup.language, quex_core_engine_generator_languages_db.keys(), "Programming language '%s' is not supported." % setup.language) setup.language_db = quex_core_engine_generator_languages_db[setup.language] setup.extension_db = global_extension_db[setup.language] # Is the output file naming scheme provided by the extension database # (Validation must happen immediately) if setup.extension_db.has_key(setup.output_file_naming_scheme) == False: error_msg("File extension scheme '%s' is not provided for language '%s'.\n" \ % (setup.output_file_naming_scheme, setup.language) + \ "Available schemes are: %s." % repr(setup.extension_db.keys())[1:-1]) # Before file names can be prepared, determine the output directory # If 'source packaging' is enabled and no output directory is specified # then take the directory of the source packaging. if setup.source_package_directory != "" and setup.output_directory == "": setup.output_directory = setup.source_package_directory if setup.buffer_codec in ["utf8", "utf16"]: setup.buffer_codec_transformation_info = setup.buffer_codec + "-state-split" elif setup.buffer_codec_file != "": try: setup.buffer_codec = os.path.splitext( os.path.basename(setup.buffer_codec_file))[0] except: error_msg("cannot interpret string following '--codec-file'") setup.buffer_codec_transformation_info = codec_db.get_codec_transformation_info( FileName=setup.buffer_codec_file) elif setup.buffer_codec != "unicode": setup.buffer_codec_transformation_info = codec_db.get_codec_transformation_info( setup.buffer_codec) if setup.buffer_codec != "unicode": setup.buffer_element_size_irrelevant = True # (*) Output files if setup.language not in ["DOT"]: prepare_file_names(setup) if setup.buffer_byte_order == "<system>": setup.buffer_byte_order = sys.byteorder setup.byte_order_is_that_of_current_system_f = True else: setup.byte_order_is_that_of_current_system_f = False if setup.buffer_element_size == "wchar_t": error_msg( "Since Quex version 0.53.5, 'wchar_t' can no longer be specified\n" "with option '--buffer-element-size' or '-bes'. Please, specify\n" "'--buffer-element-type wchar_t' or '--bet'.") if setup.buffer_element_type == "wchar_t": setup.converter_ucs_coding_name = "WCHAR_T" make_numbers(setup) # (*) Determine buffer element type and size (in bytes) if setup.buffer_element_size == -1: if global_character_type_db.has_key(setup.buffer_element_type): setup.buffer_element_size = global_character_type_db[ setup.buffer_element_type][3] elif setup.buffer_element_type == "": setup.buffer_element_size = 1 else: # If the buffer element type is defined, then here we know that it is 'unknown' # and Quex cannot know its size on its own. setup.buffer_element_size = -1 if setup.buffer_element_type == "": if setup.buffer_element_size in [1, 2, 4]: setup.buffer_element_type = { 1: "uint8_t", 2: "uint16_t", 4: "uint32_t", }[setup.buffer_element_size] elif setup.buffer_element_size == -1: pass else: error_msg("Buffer element type cannot be determined for size '%i' which\n" \ % setup.buffer_element_size + "has been specified by '-b' or '--buffer-element-size'.") setup.converter_f = False if setup.converter_iconv_f or setup.converter_icu_f: setup.converter_f = True # The only case where no converter helper is required is where ASCII # (Unicode restricted to [0, FF] is used. setup.converter_helper_required_f = True if setup.converter_f == False and setup.buffer_element_size == 1 and setup.buffer_codec == "unicode": setup.converter_helper_required_f = False validation.do(setup, command_line, argv) if setup.converter_ucs_coding_name == "": if global_character_type_db.has_key(setup.buffer_element_type): if setup.buffer_byte_order == "little": index = 1 else: index = 2 setup.converter_ucs_coding_name = global_character_type_db[ setup.buffer_element_type][index] if setup.token_id_foreign_definition_file != "": CommentDelimiterList = [["//", "\n"], ["/*", "*/"]] # Regular expression to find '#include <something>' and extract the 'something' # in a 'group'. Note that '(' ')' cause the storage of parts of the match. IncludeRE = "#[ \t]*include[ \t]*[\"<]([^\">]+)[\">]" # parse_token_id_file(setup.token_id_foreign_definition_file, setup.token_id_prefix, CommentDelimiterList, IncludeRE) if setup.token_id_prefix_plain != setup.token_id_prefix: # The 'plain' name space less token indices are also supported parse_token_id_file(setup.token_id_foreign_definition_file, setup.token_id_prefix_plain, CommentDelimiterList, IncludeRE) # (*) Compression Types compression_type_list = [] for name, ctype in [ ("compression_template_f", E_Compression.TEMPLATE), ("compression_template_uniform_f", E_Compression.TEMPLATE_UNIFORM), ("compression_path_f", E_Compression.PATH), ("compression_path_uniform_f", E_Compression.PATH_UNIFORM) ]: if command_line_args_defined(command_line, name): compression_type_list.append( (command_line_arg_position(name), ctype)) compression_type_list.sort(key=itemgetter(0)) setup.compression_type_list = map(lambda x: x[1], compression_type_list) # (*) return setup ___________________________________________________________________ return True
def __perform_setup(command_line, argv): """RETURN: True, if process needs to be started. False, if job is done. """ global setup # (*) Classes and their namespace __setup_analyzer_class(setup) __setup_token_class(setup) __setup_token_id_prefix(setup) __setup_lexeme_null(setup) # Requires 'token_class_name_space' # (*) Output programming language setup.language = setup.language.upper() verify_word_in_list(setup.language, quex_core_engine_generator_languages_db.keys(), "Programming language '%s' is not supported." % setup.language) setup.language_db = quex_core_engine_generator_languages_db[setup.language] setup.extension_db = global_extension_db[setup.language] # Is the output file naming scheme provided by the extension database # (Validation must happen immediately) if setup.extension_db.has_key(setup.output_file_naming_scheme) == False: error_msg("File extension scheme '%s' is not provided for language '%s'.\n" \ % (setup.output_file_naming_scheme, setup.language) + \ "Available schemes are: %s." % repr(setup.extension_db.keys())[1:-1]) # Before file names can be prepared, determine the output directory # If 'source packaging' is enabled and no output directory is specified # then take the directory of the source packaging. if setup.source_package_directory != "" and setup.output_directory == "": setup.output_directory = setup.source_package_directory if setup.buffer_codec in ["utf8", "utf16"]: setup.buffer_codec_transformation_info = setup.buffer_codec + "-state-split" elif setup.buffer_codec_file != "": try: setup.buffer_codec = os.path.splitext(os.path.basename(setup.buffer_codec_file))[0] except: error_msg("cannot interpret string following '--codec-file'") setup.buffer_codec_transformation_info = codec_db.get_codec_transformation_info(FileName=setup.buffer_codec_file) elif setup.buffer_codec != "unicode": setup.buffer_codec_transformation_info = codec_db.get_codec_transformation_info(setup.buffer_codec) if setup.buffer_codec != "unicode": setup.buffer_element_size_irrelevant = True # (*) Output files if setup.language not in ["DOT"]: prepare_file_names(setup) if setup.buffer_byte_order == "<system>": setup.buffer_byte_order = sys.byteorder setup.byte_order_is_that_of_current_system_f = True else: setup.byte_order_is_that_of_current_system_f = False if setup.buffer_element_size == "wchar_t": error_msg("Since Quex version 0.53.5, 'wchar_t' can no longer be specified\n" "with option '--buffer-element-size' or '-bes'. Please, specify\n" "'--buffer-element-type wchar_t' or '--bet'.") if setup.buffer_element_type == "wchar_t": setup.converter_ucs_coding_name = "WCHAR_T" make_numbers(setup) # (*) Determine buffer element type and size (in bytes) if setup.buffer_element_size == -1: if global_character_type_db.has_key(setup.buffer_element_type): setup.buffer_element_size = global_character_type_db[setup.buffer_element_type][3] elif setup.buffer_element_type == "": setup.buffer_element_size = 1 else: # If the buffer element type is defined, then here we know that it is 'unknown' # and Quex cannot know its size on its own. setup.buffer_element_size = -1 if setup.buffer_element_type == "": if setup.buffer_element_size in [1, 2, 4]: setup.buffer_element_type = { 1: "uint8_t", 2: "uint16_t", 4: "uint32_t", }[setup.buffer_element_size] elif setup.buffer_element_size == -1: pass else: error_msg("Buffer element type cannot be determined for size '%i' which\n" \ % setup.buffer_element_size + "has been specified by '-b' or '--buffer-element-size'.") setup.converter_f = False if setup.converter_iconv_f or setup.converter_icu_f: setup.converter_f = True # The only case where no converter helper is required is where ASCII # (Unicode restricted to [0, FF] is used. setup.converter_helper_required_f = True if setup.converter_f == False and setup.buffer_element_size == 1 and setup.buffer_codec == "unicode": setup.converter_helper_required_f = False validation.do(setup, command_line, argv) if setup.converter_ucs_coding_name == "": if global_character_type_db.has_key(setup.buffer_element_type): if setup.buffer_byte_order == "little": index = 1 else: index = 2 setup.converter_ucs_coding_name = global_character_type_db[setup.buffer_element_type][index] if setup.token_id_foreign_definition_file != "": CommentDelimiterList = [["//", "\n"], ["/*", "*/"]] # Regular expression to find '#include <something>' and extract the 'something' # in a 'group'. Note that '(' ')' cause the storage of parts of the match. IncludeRE = "#[ \t]*include[ \t]*[\"<]([^\">]+)[\">]" # parse_token_id_file(setup.token_id_foreign_definition_file, setup.token_id_prefix, CommentDelimiterList, IncludeRE) if setup.token_id_prefix_plain != setup.token_id_prefix: # The 'plain' name space less token indices are also supported parse_token_id_file(setup.token_id_foreign_definition_file, setup.token_id_prefix_plain, CommentDelimiterList, IncludeRE) # (*) Compression Types compression_type_list = [] for name, ctype in [("compression_template_f", E_Compression.TEMPLATE), ("compression_template_uniform_f", E_Compression.TEMPLATE_UNIFORM), ("compression_path_f", E_Compression.PATH), ("compression_path_uniform_f", E_Compression.PATH_UNIFORM)]: if command_line_args_defined(command_line, name): compression_type_list.append((command_line_arg_position(name), ctype)) compression_type_list.sort(key=itemgetter(0)) setup.compression_type_list = map(lambda x: x[1], compression_type_list) # (*) return setup ___________________________________________________________________ return True