def get_option_db():
    result = {}
    for name, value in SETUP_INFO.iteritems():
        if name.find("XX_") == 0: continue # DEPRECATED
        elif type(value) != list: continue # derived setup option
        option_list, default = value
        result.update((option, default) for option in option_list)

    return result
Example #2
0
def __interpret_command_line(argv):
    command_line = GetPot(argv)

    if command_line.search("--version", "-v"):
        print "Quex - Fast Universal Lexical Analyzer Generator"
        print "Version " + QUEX_VERSION
        print "(C) 2005-2012 Frank-Rene Schaefer"
        print "ABSOLUTELY NO WARRANTY"
        return None

    if command_line.search("--help", "-h"):
        print "Quex - Fast Universal Lexical Analyzer Generator"
        print "Please, consult the quex documentation for further help, or"
        print "visit http://quex.org"
        print "(C) 2005-2012 Frank-Rene Schaefer"
        print "ABSOLUTELY NO WARRANTY"
        return None

    for variable_name, info in SETUP_INFO.items():
        # Some parameters are not set on the command line. Their entry is not associated
        # with a description list.
        if type(info) != list: continue

        if info[1] == SetupParTypes.FLAG:
            setup.__dict__[variable_name] = command_line.search(info[0])        

        elif info[1] == SetupParTypes.NEGATED_FLAG:
            setup.__dict__[variable_name] = not command_line.search(info[0])        

        elif info[1] == SetupParTypes.LIST:
            if not command_line.search(info[0]):
                setup.__dict__[variable_name] = []
            else:
                the_list = command_line.nominus_followers(info[0])
                if len(the_list) == 0:
                    error_msg("Option %s\nnot followed by anything." % repr(info[0])[1:-1])

                if setup.__dict__.has_key(variable_name):
                    for element in the_list:
                        if element not in setup.__dict__[variable_name]:
                            setup.__dict__[variable_name].extend(the_list)        
                else:
                    setup.__dict__[variable_name] = list(set(the_list))

        elif command_line.search(info[0]):
            if not command_line.search(info[0]):
                setup.__dict__[variable_name] = info[1]
            else:
                value = command_line.follow("--EMPTY--", info[0])
                if value == "--EMPTY--":
                    error_msg("Option %s\nnot followed by anything." % repr(info[0])[1:-1])
                setup.__dict__[variable_name] = value
    return command_line
Example #3
0
File: core.py Project: xxyzzzq/quex
def argv_ufo_detections(Cl):
    """Detects unidentified command line options.
    """
    known_option_list = []
    for info in SETUP_INFO.itervalues():
        if type(info) != list: continue
        known_option_list.extend(info[0])

    ufo_list = Cl.unidentified_options(known_option_list)
    if not ufo_list: return

    option_str = "".join("%s\n" % ufo_list)
    error.log("Following command line options are unknown to current version of quex:\n" \
              + option_str, 
             SuppressCode=NotificationDB.error_ufo_on_command_line_f)
Example #4
0
File: core.py Project: xxyzzzq/quex
def argv_interpret(argv):
    """RETURNS:
         QueryF -- True, if quex is run in query mode.
                   False, if it is run in code generation mode.
         Setup  -- information about the command line.
    """
    command_line = GetPot(argv, SectionsEnabledF=False)
    query_f      = None
    command_line.disable_loop()
    for variable_name, info in SETUP_INFO.items():

        if type(info) != list:  # Parameter not set on command line?
            continue            # => skip.

        command_line.reset_cursor()
        if not command_line.search(info[0]): continue

        query_f = argv_is_query_option(command_line, info[0], variable_name, query_f)

        if   info[1] == SetupParTypes.FLAG:
            value = argv_catch_flag(command_line, info[0], None)

        elif info[1] == SetupParTypes.NEGATED_FLAG:
            value = argv_catch_negated_flag(command_line, info[0], None)

        elif info[1] == SetupParTypes.INT_LIST:
            value = argv_catch_int_list(command_line, variable_name, info[0], [])

        elif info[1] == SetupParTypes.LIST:
            value = argv_catch_list(command_line, info[0], [])

        elif isinstance(info[1], (int, long)):
            value = argv_catch_int(command_line, info[0], info[1])
        else:
            value = argv_catch_string(command_line, info[0], info[1])

        setup.set(variable_name, info[1], value)

    # Handle unidentified command line options.
    argv_ufo_detections(command_line)

    return query_f, command_line
Example #5
0
def do(setup, command_line, argv):
    """Does a consistency check for setup and the command line.
    """
    if setup.extern_token_id_file_show_f and not setup.extern_token_id_file:
        error.log("Option '%s' cannot be used without\n" %
                  _example_flag("extern_token_id_file_show_f") +
                  "option '%s'." % _example_flag("extern_token_id_file"))

    # if the mode is '--language dot' => check character display options.
    if setup.character_display not in ["hex", "utf8"]:
        error.log(
            "Character display must be either 'hex' or 'utf8'.\nFound: '%s'" %
            setup.character_display)

    # ensure that options are not specified twice
    for parameter, info in SETUP_INFO.items():
        if type(info) != list: continue
        occurence_n = 0
        for option in info[0]:
            occurence_n += argv.count(option)
        if occurence_n > 1 and info[1] not in (SetupParTypes.LIST,
                                               SetupParTypes.INT_LIST):
            error.log("Received more than one of the following options:\n" + \
                      "%s" % repr(info[0])[1:-1])

    # (*) Check for 'Depraceted' Options ___________________________________________________
    for name, info in DEPRECATED.items():
        command_line_options = SETUP_INFO[name][0]
        comment = info[0]
        depreciated_since_version = info[1]
        for option in command_line_options:
            if command_line.search(option):
                error.log("Command line option '%s' is ignored.\n" % option + \
                          comment + "\n" + \
                          "Last version of Quex supporting this option is version %s. Please, visit\n" % \
                          depreciated_since_version + \
                          "http://quex.org for further information.")

    # (*) Check for 'Straying' Options ___________________________________________________
    options = []
    for key, info in SETUP_INFO.items():
        if type(info) != list: continue
        if key in DEPRECATED: continue
        if info[1] is not None: options.extend(info[0])
    options.sort(lambda a, b: cmp(a.replace("-", ""), b.replace("-", "")))

    ufos = command_line.unidentified_options(options)
    if len(ufos) != 0:
        error.log("Unidentified option(s) = " +  repr(ufos) + "\n" + \
                  __get_supported_command_line_option_description(options))

    if setup.analyzer_derived_class_name != "" and \
       setup.analyzer_derived_class_file == "":
        error.log("Specified derived class '%s' on command line, but it was not\n" % \
                  setup.analyzer_derived_class_name + \
                  "specified which file contains the definition of it.\n" + \
                  "use command line option '--derived-class-file'.\n")

    if setup.lexatom.size_in_byte not in [-1, 1, 2, 4]:
        example_flag = SETUP_INFO["__buffer_lexatom_size_in_byte"][0][0]
        error.log("The setting of '%s' can only be\n" % example_flag +
                  "1, 2, or 4 (found %s)." % repr(setup.lexatom.size_in_byte))

    # Manually written token class requires token class name to be specified
    if setup.extern_token_class_file:
        if not setup.token_class:
            error.log(
                "The use of a manually written token class requires that the name of the class\n"
                "is specified on the command line via the '--token-class' option."
            )

    if setup.converter_only_f:
        if not setup.lexatom.type:
            error.log(
                "Lexatom type must be specific for converter generation.")
        if not _find_flag("buffer_encoding_name", argv):
            error.log(
                "Lexeme-converter-only-mode requires explicit definition of encoding.\n"
                "Example:  '%s unicode'." %
                _example_flag("buffer_encoding_name"))
        if not _find_flag("__buffer_lexatom_type", argv):
            error.log(
                "Lexeme-converter-only-mode requires explicit definition of the code unit type.\n"
                "Example: '%s uint8_t'." %
                _example_flag("__buffer_lexatom_type"))

    # Check that names are valid identifiers
    if setup.token_id_prefix_plain:
        __check_identifier(setup, "token_id_prefix_plain", "Token prefix")
    __check_identifier(setup, "analyzer_class_name", "Engine name")
    if setup.analyzer_derived_class_name != "":
        __check_identifier(setup, "analyzer_derived_class_name",
                           "Derived class name")

    __check_file_name(setup, "extern_token_class_file",
                      "file containing token class definition")
    __check_file_name(setup, "analyzer_derived_class_file",
                      "file containing user derived lexer class")
    __check_file_name(setup,
                      "extern_token_id_file",
                      "file containing user token ids",
                      0,
                      CommandLineOption=SETUP_INFO["extern_token_id_file"])
    __check_file_name(setup, "input_mode_files", "quex source file")

    # Internal engine character encoding
    if setup.buffer_encoding.name not in ("utf32", "unicode"):
        if not setup.buffer_encoding_file:
            error.verify_word_in_list(
                setup.buffer_encoding_name,
                codec_db.get_supported_codec_list() +
                ["utf8", "utf16", "utf32"],
                "Codec '%s' is not supported." % setup.buffer_encoding.name)
        # NOT: __check_codec_vs_buffer_lexatom_size_in_byte("utf8", 1)
        # BECAUSE: Code unit size is one. No type has a size of less than one byte!
        __check_codec_vs_buffer_lexatom_size_in_byte(setup, "utf16", 2)
Example #6
0
def do(setup, command_line, argv):
    """Does a consistency check for setup and the command line.
    """

    setup.output_directory = os.path.normpath(setup.output_directory)
    if setup.output_directory != "":
        # Check, if the output directory exists
        if os.access(setup.output_directory, os.F_OK) == False:
            error_msg("The directory %s was specified for output, but does not exists." % setup.output_directory)
        if os.access(setup.output_directory, os.W_OK) == False:
            error_msg("The directory %s was specified for output, but is not writeable." % setup.output_directory)

    # if the mode is 'plotting', then check wether a graphic format is speicified
    for plot_option in SETUP_INFO["plot_graphic_format"][0]:
        if plot_option in argv and setup.plot_graphic_format == "":
            error_msg("Option '%s' must be followed by a graphic format specifier (bmp, svg, jpg, ...)" % \
                      plot_option)

    if setup.plot_character_display not in ["hex", "utf8"]:
        error_msg("Plot character display must be either 'hex' or 'utf8'.\nFound: '%s'" % 
                  setup.plot_character_display)

    # ensure that options are not specified twice
    for parameter, info in SETUP_INFO.items():
        if type(info) != list: continue
        occurence_n = 0 
        for option in info[0]:
            occurence_n += argv.count(option)
        if occurence_n > 1:
            error_msg("Received more than one of the following options:\n" + \
                      "%s" % repr(info[0])[1:-1])

    # (*) Check for 'Depraceted' Options ___________________________________________________
    for name, info in DEPRECATED.items():
        command_line_options = SETUP_INFO[name][0]
        comment                   = info[0]
        depreciated_since_version = info[1]
        for option in command_line_options:
            if command_line.search(option):
                error_msg("Command line option '%s' is ignored.\n" % option + \
                          comment + "\n" + \
                          "Last version of Quex supporting this option is version %s. Please, visit\n" % \
                          depreciated_since_version + \
                          "http://quex.org for further information.")
                          
    # (*) Check for 'Straying' Options ___________________________________________________
    options = []
    for key, info in SETUP_INFO.items():
        if type(info) != list: continue
        if key in DEPRECATED: continue
        if info[1] != None: options.extend(info[0])
    options.sort(lambda a,b: cmp(a.replace("-",""), b.replace("-","")))

    ufos = command_line.unidentified_options(options)
    if ufos != []:
        error_msg("Unidentified option(s) = " +  repr(ufos) + "\n" + \
                  __get_supported_command_line_option_description(options))

    if setup.analyzer_derived_class_name != "" and \
       setup.analyzer_derived_class_file == "":
            error_msg("Specified derived class '%s' on command line, but it was not\n" % \
                      setup.analyzer_derived_class_name + \
                      "specified which file contains the definition of it.\n" + \
                      "use command line option '--derived-class-file'.\n")

    if setup.buffer_element_size not in [-1, 1, 2, 4]:
        error_msg("The setting of '--buffer-element-size' (or '-b') can only be\n" 
                  "1, 2, or 4 (found %s)." % repr(setup.buffer_element_size))

    if setup.buffer_byte_order not in ["<system>", "little", "big"]:
        error_msg("Byte order (option --endian) must be 'little', 'big', or '<system>'.\n" + \
                  "Note, that this option is only interesting for cross plattform development.\n" + \
                  "By default, quex automatically chooses the endian type of your system.")

    # Manually written token class requires token class name to be specified
    if setup.token_class_file != "" and command_line.search("--token-class", "--tc") == False:
        error_msg("The use of a manually written token class requires that the name of the class\n"
                  "is specified on the command line via the '--token-class' option.")
    
    # Token queue
    if setup.token_policy != "queue" and command_line.search("--token-queue-size"):
        error_msg("Option --token-queue-size determines a fixed token queue size. This makes\n" + \
                  "only sense in conjunction with '--token-policy queue'.\n")
    if setup.token_queue_size <= setup.token_queue_safety_border + 1:
        if setup.token_queue_size == setup.token_queue_safety_border: cmp_str = "equal to"
        else:                                                         cmp_str = "less than"
        error_msg("Token queue size is %i is %s token queue safety border %i + 1.\n" % \
                  (setup.token_queue_size, cmp_str, setup.token_queue_safety_border) + 
                  "Set appropriate values with --token-queue-size and --token-queue-safety-border.")

    # Check that names are valid identifiers
    __check_identifier(setup, "token_id_prefix_plain",    "Token prefix")
    __check_identifier(setup, "analyzer_class_name", "Engine name")
    if setup.analyzer_derived_class_name != "": 
        __check_identifier(setup, "analyzer_derived_class_name", "Derived class name")
    
    __check_file_name(setup, "token_class_file",            "file containing token class definition")
    __check_file_name(setup, "analyzer_derived_class_file", "file containing user derived lexer class")
    __check_file_name(setup, "token_id_foreign_definition_file", "file containing user token ids")
    __check_file_name(setup, "input_mode_files", "quex source file")

    # Check that not more than one converter is specified
    converter_n = 0
    if setup.converter_iconv_f:             converter_n += 1
    if setup.converter_icu_f:               converter_n += 1 
    if setup.converter_user_new_func != "": converter_n += 1
    if converter_n > 1:
        error_msg("More than one character converter has been specified. Note, that the\n" + \
                  "options '--icu', '--iconv', and '--converter-new' (or '--cn') are\n"    + \
                  "to be used mutually exclusively.")
    if converter_n == 1 and setup.buffer_codec != "":  
        error_msg("An engine that is to be generated for a specific codec cannot rely\n"      + \
                  "on converters. Do no use '--codec' together with '--icu', '--iconv', or\n" + \
                  "`--converter-new`.")

    # If a user defined type is specified for 'engine character type' and 
    # a converter, then the name of the target type must be specified explicitly.
    if         setup.buffer_element_type != "" \
       and not global_character_type_db.has_key(setup.buffer_element_type) \
       and     setup.converter_ucs_coding_name == "" \
       and     converter_n != 0:
        tc = setup.buffer_element_type
        error_msg("A character code converter has been specified. It is supposed to convert\n" + \
                  "incoming data into an internal buffer of unicode characters. The size of\n" + \
                  "each character is determined by '%s' which is a user defined type.\n" % tc  + \
                  "\n" + \
                  "Quex cannot determine automatically the name that the converter requires\n" +      \
                  "to produce unicode characters for type '%s'. It must be specified by the\n" % tc + \
                  "command line option '--converter-ucs-coding-name' or '--cucn'.")

    # Token transmission policy
    token_policy_list = ["queue", "single", "users_token", "users_queue"]
    if setup.token_policy not in token_policy_list:
        error_msg("Token policy '%s' not supported. Use one of the following:\n" % setup.token_policy + \
                  repr(token_policy_list)[1:-1])
    elif setup.token_policy == "users_token":
        error_msg("Token policy 'users_queue' has be deprecated since 0.49.1. Use\n"
                  "equivalent policy 'single'.")
    elif setup.token_policy == "users_queue":
        error_msg("Token policy 'users_queue' has be deprecated since 0.49.1\n")

    # Internal engine character encoding
    def __codec_vs_buffer_element_size(CodecName, RequiredBufferElementSize):
        if   setup.buffer_codec        != CodecName:                 return
        elif setup.buffer_element_size == RequiredBufferElementSize: return

        if setup.buffer_element_size == -1: 
            msg_str = "undetermined (found type '%s')" % setup.buffer_element_type
        else:
            msg_str = "is not %i (found %i)" % (RequiredBufferElementSize, setup.buffer_element_size)

        error_msg("Using codec '%s' while buffer element size %s.\n" % (CodecName, msg_str) + 
                  "Consult command line argument '--buffer-element-size'.")

    if setup.buffer_codec != "":
        verify_word_in_list(setup.buffer_codec,
                            codec_db.get_supported_codec_list() + ["utf8", "utf16"],
                            "Codec '%s' is not supported." % setup.buffer_codec)
        __codec_vs_buffer_element_size("utf8", 1)
        __codec_vs_buffer_element_size("utf16", 2)

    # Path Compression
    if setup.compression_path_uniform_f and setup.compression_path_f:
        error_msg("Both flags for path compression were set: '--path-compression' and\n" 
                  "'--path-compression-uniform'. Please, choose only one!")
Example #7
0
def do(setup, command_line, argv):
    """Does a consistency check for setup and the command line.
    """

    setup.output_directory = os.path.normpath(setup.output_directory)
    if setup.output_directory:
        # Check, if the output directory exists
        if os.access(setup.output_directory, os.F_OK) == False:
            error.log("The directory %s was specified for output, but does not exists." % setup.output_directory)
        if os.access(setup.output_directory, os.W_OK) == False:
            error.log("The directory %s was specified for output, but is not writeable." % setup.output_directory)

    # if the mode is '--language dot' => check character display options. 
    if setup.character_display not in ["hex", "utf8"]:
        error.log("Character display must be either 'hex' or 'utf8'.\nFound: '%s'" % 
                  setup.character_display)

    # ensure that options are not specified twice
    for parameter, info in SETUP_INFO.items():
        if type(info) != list: continue
        occurence_n = 0 
        for option in info[0]:
            occurence_n += argv.count(option)
        if occurence_n > 1 and info[1] not in (SetupParTypes.LIST, SetupParTypes.INT_LIST):
            error.log("Received more than one of the following options:\n" + \
                      "%s" % repr(info[0])[1:-1])

    # (*) Check for 'Depraceted' Options ___________________________________________________
    for name, info in DEPRECATED.items():
        command_line_options = SETUP_INFO[name][0]
        comment                   = info[0]
        depreciated_since_version = info[1]
        for option in command_line_options:
            if command_line.search(option):
                error.log("Command line option '%s' is ignored.\n" % option + \
                          comment + "\n" + \
                          "Last version of Quex supporting this option is version %s. Please, visit\n" % \
                          depreciated_since_version + \
                          "http://quex.org for further information.")
                          
    # (*) Check for 'Straying' Options ___________________________________________________
    options = []
    for key, info in SETUP_INFO.items():
        if type(info) != list: continue
        if key in DEPRECATED: continue
        if info[1] is not None: options.extend(info[0])
    options.sort(lambda a,b: cmp(a.replace("-",""), b.replace("-","")))

    ufos = command_line.unidentified_options(options)
    if len(ufos) != 0:
        error.log("Unidentified option(s) = " +  repr(ufos) + "\n" + \
                  __get_supported_command_line_option_description(options))

    if setup.analyzer_derived_class_name != "" and \
       setup.analyzer_derived_class_file == "":
            error.log("Specified derived class '%s' on command line, but it was not\n" % \
                      setup.analyzer_derived_class_name + \
                      "specified which file contains the definition of it.\n" + \
                      "use command line option '--derived-class-file'.\n")

    if setup.buffer_element_size not in [-1, 1, 2, 4]:
        error.log("The setting of '--buffer-element-size' (or '-b') can only be\n" 
                  "1, 2, or 4 (found %s)." % repr(setup.buffer_element_size))

    if setup.buffer_byte_order not in ["<system>", "little", "big"]:
        error.log("Byte order (option --endian) must be 'little', 'big', or '<system>'.\n" + \
                  "Note, that this option is only interesting for cross plattform development.\n" + \
                  "By default, quex automatically chooses the endian type of your system.")

    # Manually written token class requires token class name to be specified
    if setup.token_class_file != "" and command_line.search("--token-class", "--tc") == False:
        error.log("The use of a manually written token class requires that the name of the class\n"
                  "is specified on the command line via the '--token-class' option.")
    
    # Token queue
    if setup.token_policy != "queue" and command_line.search("--token-queue-size"):
        error.log("Option --token-queue-size determines a fixed token queue size. This makes\n" + \
                  "only sense in conjunction with '--token-policy queue'.\n")
    if setup.token_queue_size <= setup.token_queue_safety_border + 1:
        if setup.token_queue_size == setup.token_queue_safety_border: cmp_str = "equal to"
        else:                                                         cmp_str = "less than"
        error.log("Token queue size is %i is %s token queue safety border %i + 1.\n" % \
                  (setup.token_queue_size, cmp_str, setup.token_queue_safety_border) + 
                  "Set appropriate values with --token-queue-size and --token-queue-safety-border.")

    # Check that names are valid identifiers
    if len(setup.token_id_prefix_plain) != 0:
        __check_identifier(setup, "token_id_prefix_plain", "Token prefix")
    __check_identifier(setup, "analyzer_class_name", "Engine name")
    if setup.analyzer_derived_class_name != "": 
        __check_identifier(setup, "analyzer_derived_class_name", "Derived class name")
    
    __check_file_name(setup, "token_class_file",                 "file containing token class definition")
    __check_file_name(setup, "analyzer_derived_class_file",      "file containing user derived lexer class")
    __check_file_name(setup, "token_id_foreign_definition_file", "file containing user token ids", 0,
                      CommandLineOption=SETUP_INFO["token_id_foreign_definition"][0])
    __check_file_name(setup, "input_mode_files", "quex source file")

    # Check that not more than one converter is specified
    converter_n = 0
    if setup.converter_iconv_f:                 converter_n += 1
    if setup.converter_icu_f:                   converter_n += 1 
    if len(setup.converter_user_new_func) != 0: converter_n += 1
    if converter_n > 1:
        error.log("More than one character converter has been specified. Note, that the\n" + \
                  "options '--icu', '--iconv', and '--converter-new' (or '--cn') are\n"    + \
                  "to be used mutually exclusively.")
    if converter_n == 1 and setup.buffer_codec.name != "unicode":  
        # If the buffer codec is other than unicode, then no converter shall
        # be used to fill the buffer. Instead, the engine is transformed, so 
        # that it works directly on the codec.
        error.log("An engine that is to be generated for a specific codec cannot rely\n"      + \
                  "on converters. Do no use '--codec' together with '--icu', '--iconv', or\n" + \
                  "`--converter-new`.")

    # If a converter has been specified and no bytes-element-size has been specified,
    # it defaults to '1 byte' which is most likely not what is desired for unicode.
    if     converter_n == 1 \
       and setup.buffer_element_size == 1 \
       and not command_line_args_defined(command_line, "buffer_element_size") \
       and not command_line_args_defined(command_line, "buffer_element_type"):
        error.log("A converter has been specified, but the default buffer element size\n" + \
                  "is left to 1 byte. Consider %s or %s." \
                  % (command_line_args_string("buffer_element_size"),
                     command_line_args_string("buffer_element_type")))

    # If a user defined type is specified for 'engine character type' and 
    # a converter, then the name of the target type must be specified explicitly.
    if         setup.buffer_element_type != "" \
       and not global_character_type_db.has_key(setup.buffer_element_type) \
       and     setup.converter_ucs_coding_name == "" \
       and     converter_n != 0:
        tc = setup.buffer_element_type
        error.log("A character code converter has been specified. It is supposed to convert\n" + \
                  "incoming data into an internal buffer of unicode characters. The size of\n" + \
                  "each character is determined by '%s' which is a user defined type.\n" % tc  + \
                  "\n" + \
                  "Quex cannot determine automatically the name that the converter requires\n" +      \
                  "to produce unicode characters for type '%s'. It must be specified by the\n" % tc + \
                  "command line option %s." \
                  % command_line_args_string("converter_ucs_coding_name"))

    # Token transmission policy
    token_policy_list = ["queue", "single", "users_token", "users_queue"]
    if setup.token_policy not in token_policy_list:
        error.log("Token policy '%s' not supported. Use one of the following:\n" % setup.token_policy + \
                  repr(token_policy_list)[1:-1])
    elif setup.token_policy == "users_token":
        error.log("Token policy 'users_queue' has be deprecated since 0.49.1. Use\n"
                  "equivalent policy 'single'.")
    elif setup.token_policy == "users_queue":
        error.log("Token policy 'users_queue' has be deprecated since 0.49.1\n")

    # Internal engine character encoding
    def __codec_vs_buffer_element_size(CodecName, RequiredBufferElementSize):
        if   setup.buffer_codec.name   != CodecName:                 return
        elif setup.buffer_element_size == RequiredBufferElementSize: return

        if setup.buffer_element_size == -1: 
            msg_str = "undetermined (found type '%s')" % setup.buffer_element_type
        else:
            msg_str = "is not %i (found %i)" % (RequiredBufferElementSize, setup.buffer_element_size)

        error.log("Using codec '%s' while buffer element size %s.\n" % (CodecName, msg_str) + 
                  "Consult command line argument %s" \
                  % command_line_args_string("buffer_element_size"))

    if setup.buffer_codec.name != "unicode":
        if not setup.buffer_codec_file:
            error.verify_word_in_list(setup.buffer_codec_name,
                                      codec_db.get_supported_codec_list() + ["utf8", "utf16"],
                                      "Codec '%s' is not supported." % setup.buffer_codec.name)
        __codec_vs_buffer_element_size("utf8", 1)
        __codec_vs_buffer_element_size("utf16", 2)

    if setup.external_lexeme_null_object and setup.token_class_only_f:
        error.log("Specifying an external lexeme null object signalizes an\n"
                  "external token class implementation. The 'token class only\n"
                  "flag' generates a token class considered to be externally\n"
                  "shared. Both flags are mutually exclusive.")

    if setup.string_accumulator_f:
        error_n = NotificationDB.warning_on_no_token_class_take_text
        if error_n in setup.suppressed_notification_list: 
           error.warning("The warning upon missing 'take_text' in token type definition is de-\n"
                     + "activated by '--suppress %i'. This is dangerous, if there is a string\n" % error_n
                     + "accumulator. May be, use '--no-string-accumulator'.", -1,
                    SuppressCode=NotificationDB.warning_on_no_warning_on_missing_take_text)
Example #8
0
def do(setup, command_line, argv):
    """Does a consistency check for setup and the command line.
    """

    setup.output_directory = os.path.normpath(setup.output_directory)
    if setup.output_directory:
        # Check, if the output directory exists
        if os.access(setup.output_directory, os.F_OK) == False:
            error_msg(
                "The directory %s was specified for output, but does not exists."
                % setup.output_directory)
        if os.access(setup.output_directory, os.W_OK) == False:
            error_msg(
                "The directory %s was specified for output, but is not writeable."
                % setup.output_directory)

    # if the mode is '--language dot' => check character display options.
    if setup.character_display not in ["hex", "utf8"]:
        error_msg(
            "Character display must be either 'hex' or 'utf8'.\nFound: '%s'" %
            setup.character_display)

    # ensure that options are not specified twice
    for parameter, info in SETUP_INFO.items():
        if type(info) != list: continue
        occurence_n = 0
        for option in info[0]:
            occurence_n += argv.count(option)
        if occurence_n > 1 and info[1] not in (SetupParTypes.LIST,
                                               SetupParTypes.INT_LIST):
            error_msg("Received more than one of the following options:\n" + \
                      "%s" % repr(info[0])[1:-1])

    # (*) Check for 'Depraceted' Options ___________________________________________________
    for name, info in DEPRECATED.items():
        command_line_options = SETUP_INFO[name][0]
        comment = info[0]
        depreciated_since_version = info[1]
        for option in command_line_options:
            if command_line.search(option):
                error_msg("Command line option '%s' is ignored.\n" % option + \
                          comment + "\n" + \
                          "Last version of Quex supporting this option is version %s. Please, visit\n" % \
                          depreciated_since_version + \
                          "http://quex.org for further information.")

    # (*) Check for 'Straying' Options ___________________________________________________
    options = []
    for key, info in SETUP_INFO.items():
        if type(info) != list: continue
        if key in DEPRECATED: continue
        if info[1] is not None: options.extend(info[0])
    options.sort(lambda a, b: cmp(a.replace("-", ""), b.replace("-", "")))

    ufos = command_line.unidentified_options(options)
    if len(ufos) != 0:
        error_msg("Unidentified option(s) = " +  repr(ufos) + "\n" + \
                  __get_supported_command_line_option_description(options))

    if setup.analyzer_derived_class_name != "" and \
       setup.analyzer_derived_class_file == "":
        error_msg("Specified derived class '%s' on command line, but it was not\n" % \
                  setup.analyzer_derived_class_name + \
                  "specified which file contains the definition of it.\n" + \
                  "use command line option '--derived-class-file'.\n")

    if setup.buffer_element_size not in [-1, 1, 2, 4]:
        error_msg(
            "The setting of '--buffer-element-size' (or '-b') can only be\n"
            "1, 2, or 4 (found %s)." % repr(setup.buffer_element_size))

    if setup.buffer_byte_order not in ["<system>", "little", "big"]:
        error_msg("Byte order (option --endian) must be 'little', 'big', or '<system>'.\n" + \
                  "Note, that this option is only interesting for cross plattform development.\n" + \
                  "By default, quex automatically chooses the endian type of your system.")

    # Manually written token class requires token class name to be specified
    if setup.token_class_file != "" and command_line.search(
            "--token-class", "--tc") == False:
        error_msg(
            "The use of a manually written token class requires that the name of the class\n"
            "is specified on the command line via the '--token-class' option.")

    # Token queue
    if setup.token_policy != "queue" and command_line.search(
            "--token-queue-size"):
        error_msg("Option --token-queue-size determines a fixed token queue size. This makes\n" + \
                  "only sense in conjunction with '--token-policy queue'.\n")
    if setup.token_queue_size <= setup.token_queue_safety_border + 1:
        if setup.token_queue_size == setup.token_queue_safety_border:
            cmp_str = "equal to"
        else:
            cmp_str = "less than"
        error_msg("Token queue size is %i is %s token queue safety border %i + 1.\n" % \
                  (setup.token_queue_size, cmp_str, setup.token_queue_safety_border) +
                  "Set appropriate values with --token-queue-size and --token-queue-safety-border.")

    # Check that names are valid identifiers
    if len(setup.token_id_prefix_plain) != 0:
        __check_identifier(setup, "token_id_prefix_plain", "Token prefix")
    __check_identifier(setup, "analyzer_class_name", "Engine name")
    if setup.analyzer_derived_class_name != "":
        __check_identifier(setup, "analyzer_derived_class_name",
                           "Derived class name")

    __check_file_name(setup, "token_class_file",
                      "file containing token class definition")
    __check_file_name(setup, "analyzer_derived_class_file",
                      "file containing user derived lexer class")
    __check_file_name(
        setup,
        "token_id_foreign_definition_file",
        "file containing user token ids",
        0,
        CommandLineOption=SETUP_INFO["token_id_foreign_definition"][0])
    __check_file_name(setup, "input_mode_files", "quex source file")

    # Check that not more than one converter is specified
    converter_n = 0
    if setup.converter_iconv_f: converter_n += 1
    if setup.converter_icu_f: converter_n += 1
    if len(setup.converter_user_new_func) != 0: converter_n += 1
    if converter_n > 1:
        error_msg("More than one character converter has been specified. Note, that the\n" + \
                  "options '--icu', '--iconv', and '--converter-new' (or '--cn') are\n"    + \
                  "to be used mutually exclusively.")
    if converter_n == 1 and setup.buffer_codec.name != "unicode":
        # If the buffer codec is other than unicode, then no converter shall
        # be used to fill the buffer. Instead, the engine is transformed, so
        # that it works directly on the codec.
        error_msg("An engine that is to be generated for a specific codec cannot rely\n"      + \
                  "on converters. Do no use '--codec' together with '--icu', '--iconv', or\n" + \
                  "`--converter-new`.")

    # If a converter has been specified and no bytes-element-size has been specified,
    # it defaults to '1 byte' which is most likely not what is desired for unicode.
    if     converter_n == 1 \
       and setup.buffer_element_size == 1 \
       and not command_line_args_defined(command_line, "buffer_element_size") \
       and not command_line_args_defined(command_line, "buffer_element_type"):
        error_msg("A converter has been specified, but the default buffer element size\n" + \
                  "is left to 1 byte. Consider %s or %s." \
                  % (command_line_args_string("buffer_element_size"),
                     command_line_args_string("buffer_element_type")))

    # If a user defined type is specified for 'engine character type' and
    # a converter, then the name of the target type must be specified explicitly.
    if         setup.buffer_element_type != "" \
       and not global_character_type_db.has_key(setup.buffer_element_type) \
       and     setup.converter_ucs_coding_name == "" \
       and     converter_n != 0:
        tc = setup.buffer_element_type
        error_msg("A character code converter has been specified. It is supposed to convert\n" + \
                  "incoming data into an internal buffer of unicode characters. The size of\n" + \
                  "each character is determined by '%s' which is a user defined type.\n" % tc  + \
                  "\n" + \
                  "Quex cannot determine automatically the name that the converter requires\n" +      \
                  "to produce unicode characters for type '%s'. It must be specified by the\n" % tc + \
                  "command line option %s." \
                  % command_line_args_string("converter_ucs_coding_name"))

    # Token transmission policy
    token_policy_list = ["queue", "single", "users_token", "users_queue"]
    if setup.token_policy not in token_policy_list:
        error_msg("Token policy '%s' not supported. Use one of the following:\n" % setup.token_policy + \
                  repr(token_policy_list)[1:-1])
    elif setup.token_policy == "users_token":
        error_msg(
            "Token policy 'users_queue' has be deprecated since 0.49.1. Use\n"
            "equivalent policy 'single'.")
    elif setup.token_policy == "users_queue":
        error_msg(
            "Token policy 'users_queue' has be deprecated since 0.49.1\n")

    # Internal engine character encoding
    def __codec_vs_buffer_element_size(CodecName, RequiredBufferElementSize):
        if setup.buffer_codec.name != CodecName: return
        elif setup.buffer_element_size == RequiredBufferElementSize: return

        if setup.buffer_element_size == -1:
            msg_str = "undetermined (found type '%s')" % setup.buffer_element_type
        else:
            msg_str = "is not %i (found %i)" % (RequiredBufferElementSize,
                                                setup.buffer_element_size)

        error_msg("Using codec '%s' while buffer element size %s.\n" % (CodecName, msg_str) +
                  "Consult command line argument %s" \
                  % command_line_args_string("buffer_element_size"))

    if setup.buffer_codec.name != "unicode":
        if not setup.buffer_codec_file:
            verify_word_in_list(
                setup.buffer_codec_name,
                codec_db.get_supported_codec_list() + ["utf8", "utf16"],
                "Codec '%s' is not supported." % setup.buffer_codec.name)
        __codec_vs_buffer_element_size("utf8", 1)
        __codec_vs_buffer_element_size("utf16", 2)

    if setup.external_lexeme_null_object and setup.token_class_only_f:
        error_msg(
            "Specifying an external lexeme null object signalizes an\n"
            "external token class implementation. The 'token class only\n"
            "flag' generates a token class considered to be externally\n"
            "shared. Both flags are mutually exclusive.")

    if setup.string_accumulator_f:
        error_n = NotificationDB.warning_on_no_token_class_take_text
        if error_n in setup.suppressed_notification_list:
            error_msg(
                "The warning upon missing 'take_text' in token type definition is de-\n"
                +
                "activated by '--suppress %i'. This is dangerous, if there is a string\n"
                % error_n +
                "accumulator. May be, use '--no-string-accumulator'.",
                DontExitF=True,
                WarningF=True,
                SuppressCode=NotificationDB.
                warning_on_no_warning_on_missing_take_text)
Example #9
0
def do(setup, command_line, argv):
    """Does a consistency check for setup and the command line.
    """

    setup.output_directory = os.path.normpath(setup.output_directory)
    if setup.output_directory != "":
        # Check, if the output directory exists
        if os.access(setup.output_directory, os.F_OK) == False:
            error_msg(
                "The directory %s was specified for output, but does not exists."
                % setup.output_directory)
        if os.access(setup.output_directory, os.W_OK) == False:
            error_msg(
                "The directory %s was specified for output, but is not writeable."
                % setup.output_directory)

    # if the mode is 'plotting', then check wether a graphic format is speicified
    for plot_option in SETUP_INFO["plot_graphic_format"][0]:
        if plot_option in argv and setup.plot_graphic_format == "":
            error_msg("Option '%s' must be followed by a graphic format specifier (bmp, svg, jpg, ...)" % \
                      plot_option)

    if setup.plot_character_display not in ["hex", "utf8"]:
        error_msg(
            "Plot character display must be either 'hex' or 'utf8'.\nFound: '%s'"
            % setup.plot_character_display)

    # ensure that options are not specified twice
    for parameter, info in SETUP_INFO.items():
        if type(info) != list: continue
        occurence_n = 0
        for option in info[0]:
            occurence_n += argv.count(option)
        if occurence_n > 1:
            error_msg("Received more than one of the following options:\n" + \
                      "%s" % repr(info[0])[1:-1])

    # (*) Check for 'Depraceted' Options ___________________________________________________
    for name, info in DEPRECATED.items():
        command_line_options = SETUP_INFO[name][0]
        comment = info[0]
        depreciated_since_version = info[1]
        for option in command_line_options:
            if command_line.search(option):
                error_msg("Command line option '%s' is ignored.\n" % option + \
                          comment + "\n" + \
                          "Last version of Quex supporting this option is version %s. Please, visit\n" % \
                          depreciated_since_version + \
                          "http://quex.org for further information.")

    # (*) Check for 'Straying' Options ___________________________________________________
    options = []
    for key, info in SETUP_INFO.items():
        if type(info) != list: continue
        if key in DEPRECATED: continue
        if info[1] != None: options.extend(info[0])
    options.sort(lambda a, b: cmp(a.replace("-", ""), b.replace("-", "")))

    ufos = command_line.unidentified_options(options)
    if ufos != []:
        error_msg("Unidentified option(s) = " +  repr(ufos) + "\n" + \
                  __get_supported_command_line_option_description(options))

    if setup.analyzer_derived_class_name != "" and \
       setup.analyzer_derived_class_file == "":
        error_msg("Specified derived class '%s' on command line, but it was not\n" % \
                  setup.analyzer_derived_class_name + \
                  "specified which file contains the definition of it.\n" + \
                  "use command line option '--derived-class-file'.\n")

    if setup.buffer_element_size not in [-1, 1, 2, 4]:
        error_msg(
            "The setting of '--buffer-element-size' (or '-b') can only be\n"
            "1, 2, or 4 (found %s)." % repr(setup.buffer_element_size))

    if setup.buffer_byte_order not in ["<system>", "little", "big"]:
        error_msg("Byte order (option --endian) must be 'little', 'big', or '<system>'.\n" + \
                  "Note, that this option is only interesting for cross plattform development.\n" + \
                  "By default, quex automatically chooses the endian type of your system.")

    # Manually written token class requires token class name to be specified
    if setup.token_class_file != "" and command_line.search(
            "--token-class", "--tc") == False:
        error_msg(
            "The use of a manually written token class requires that the name of the class\n"
            "is specified on the command line via the '--token-class' option.")

    # Token queue
    if setup.token_policy != "queue" and command_line.search(
            "--token-queue-size"):
        error_msg("Option --token-queue-size determines a fixed token queue size. This makes\n" + \
                  "only sense in conjunction with '--token-policy queue'.\n")
    if setup.token_queue_size <= setup.token_queue_safety_border + 1:
        if setup.token_queue_size == setup.token_queue_safety_border:
            cmp_str = "equal to"
        else:
            cmp_str = "less than"
        error_msg("Token queue size is %i is %s token queue safety border %i + 1.\n" % \
                  (setup.token_queue_size, cmp_str, setup.token_queue_safety_border) +
                  "Set appropriate values with --token-queue-size and --token-queue-safety-border.")

    # Check that names are valid identifiers
    __check_identifier(setup, "token_id_prefix_plain", "Token prefix")
    __check_identifier(setup, "analyzer_class_name", "Engine name")
    if setup.analyzer_derived_class_name != "":
        __check_identifier(setup, "analyzer_derived_class_name",
                           "Derived class name")

    __check_file_name(setup, "token_class_file",
                      "file containing token class definition")
    __check_file_name(setup, "analyzer_derived_class_file",
                      "file containing user derived lexer class")
    __check_file_name(setup, "token_id_foreign_definition_file",
                      "file containing user token ids")
    __check_file_name(setup, "input_mode_files", "quex source file")

    # Check that not more than one converter is specified
    converter_n = 0
    if setup.converter_iconv_f: converter_n += 1
    if setup.converter_icu_f: converter_n += 1
    if setup.converter_user_new_func != "": converter_n += 1
    if converter_n > 1:
        error_msg("More than one character converter has been specified. Note, that the\n" + \
                  "options '--icu', '--iconv', and '--converter-new' (or '--cn') are\n"    + \
                  "to be used mutually exclusively.")
    if converter_n == 1 and setup.buffer_codec != "":
        error_msg("An engine that is to be generated for a specific codec cannot rely\n"      + \
                  "on converters. Do no use '--codec' together with '--icu', '--iconv', or\n" + \
                  "`--converter-new`.")

    # If a user defined type is specified for 'engine character type' and
    # a converter, then the name of the target type must be specified explicitly.
    if         setup.buffer_element_type != "" \
       and not global_character_type_db.has_key(setup.buffer_element_type) \
       and     setup.converter_ucs_coding_name == "" \
       and     converter_n != 0:
        tc = setup.buffer_element_type
        error_msg("A character code converter has been specified. It is supposed to convert\n" + \
                  "incoming data into an internal buffer of unicode characters. The size of\n" + \
                  "each character is determined by '%s' which is a user defined type.\n" % tc  + \
                  "\n" + \
                  "Quex cannot determine automatically the name that the converter requires\n" +      \
                  "to produce unicode characters for type '%s'. It must be specified by the\n" % tc + \
                  "command line option '--converter-ucs-coding-name' or '--cucn'.")

    # Token transmission policy
    token_policy_list = ["queue", "single", "users_token", "users_queue"]
    if setup.token_policy not in token_policy_list:
        error_msg("Token policy '%s' not supported. Use one of the following:\n" % setup.token_policy + \
                  repr(token_policy_list)[1:-1])
    elif setup.token_policy == "users_token":
        error_msg(
            "Token policy 'users_queue' has be deprecated since 0.49.1. Use\n"
            "equivalent policy 'single'.")
    elif setup.token_policy == "users_queue":
        error_msg(
            "Token policy 'users_queue' has be deprecated since 0.49.1\n")

    # Internal engine character encoding
    def __codec_vs_buffer_element_size(CodecName, RequiredBufferElementSize):
        if setup.buffer_codec != CodecName: return
        elif setup.buffer_element_size == RequiredBufferElementSize: return

        if setup.buffer_element_size == -1:
            msg_str = "undetermined (found type '%s')" % setup.buffer_element_type
        else:
            msg_str = "is not %i (found %i)" % (RequiredBufferElementSize,
                                                setup.buffer_element_size)

        error_msg("Using codec '%s' while buffer element size %s.\n" %
                  (CodecName, msg_str) +
                  "Consult command line argument '--buffer-element-size'.")

    if setup.buffer_codec != "":
        verify_word_in_list(
            setup.buffer_codec,
            codec_db.get_supported_codec_list() + ["utf8", "utf16"],
            "Codec '%s' is not supported." % setup.buffer_codec)
        __codec_vs_buffer_element_size("utf8", 1)
        __codec_vs_buffer_element_size("utf16", 2)

    # Path Compression
    if setup.compression_path_uniform_f and setup.compression_path_f:
        error_msg(
            "Both flags for path compression were set: '--path-compression' and\n"
            "'--path-compression-uniform'. Please, choose only one!")
Example #10
0
def do(argv):
    """RETURN:  True, if process needs to be started.
                False, if job is done.
    """
    global setup

    # (*) Interpret Command Line (A) _____________________________________________________
    command_line = GetPot(argv)

    if command_line.search("--version", "-v"):
        print "Quex - Fast Universal Lexical Analyzer Generator"
        print "Version " + QUEX_VERSION
        print "(C) 2006-2010 Frank-Rene Schaefer"
        print "ABSOLUTELY NO WARRANTY"
        return False

    if command_line.search("--help", "-h"):
        print "Quex - Fast Universal Lexical Analyzer Generator"
        print "Please, consult the quex documentation for further help, or"
        print "visit http://quex.org"
        print "(C) 2006-2010 Frank-Rene Schaefer"
        print "ABSOLUTELY NO WARRANTY"
        return False

    if command_line.search("--plot-format-list"):
        print quex.output.graphviz.interface.get_supported_graphic_format_description()
        return False

    for variable_name, info in SETUP_INFO.items():
        # Some parameters are not set on the command line. Their entry is not associated
        # with a description list.
        if type(info) != list: continue

        if info[1] == FLAG:
            setup.__dict__[variable_name] = command_line.search(info[0])        

        elif info[1] == NEGATED_FLAG:
            setup.__dict__[variable_name] = not command_line.search(info[0])        

        elif info[1] == LIST:
            if not command_line.search(info[0]):
                setup.__dict__[variable_name] = []
            else:
                the_list = command_line.nominus_followers(info[0])
                if the_list == []:
                    error_msg("Option %s\nnot followed by anything." % repr(info[0])[1:-1])

                if setup.__dict__.has_key(variable_name):
                    setup.__dict__[variable_name].extend(the_list)        
                else:
                    setup.__dict__[variable_name] = the_list

        elif command_line.search(info[0]):
            if not command_line.search(info[0]):
                setup.__dict__[variable_name] = info[1]
            else:
                value = command_line.follow("--EMPTY--", info[0])
                if value == "--EMPTY--":
                    error_msg("Option %s\nnot followed by anything." % repr(info[0])[1:-1])
                setup.__dict__[variable_name] = value

    # (*) Classes and their namespace
    setup.analyzer_class_name, \
    setup.analyzer_name_space, \
    setup.analyzer_name_safe   = \
         read_namespaced_name(setup.analyzer_class_name, 
                              "analyzer engine (options -o, --engine, --analyzer-class)")

    setup.analyzer_derived_class_name,       \
    setup.analyzer_derived_class_name_space, \
    setup.analyzer_derived_class_name_safe = \
         read_namespaced_name(setup.analyzer_derived_class_name, 
                              "derived analyzer class (options --derived-class, --dc)",
                              AllowEmptyF=True)

    if setup.analyzer_name_space == []:
        setup.analyzer_name_space = ["quex"]

    if setup.token_class_name == "":
        setup.token_class_name = "%s::Token" % reduce(lambda a, b: a + "::" + b, setup.analyzer_name_space)

    # Token classes and derived classes have the freedom not to open a namespace,
    # thus no check 'if namespace == []'.
    setup.token_class_name,       \
    setup.token_class_name_space, \
    setup.token_class_name_safe = \
         read_namespaced_name(setup.token_class_name, 
                              "token class (options --token-class, --tc)")

    if setup.token_class_file != "":
        lexer_mode.token_type_definition = \
                ManualTokenClassSetup(setup.token_class_file,
                                      setup.token_class_name,
                                      setup.token_class_name_space,
                                      setup.token_class_name_safe,
                                      setup.token_id_type)

    if setup.token_class_name_space == []:
        setup.token_class_name_space = deepcopy(setup.analyzer_name_space)

    setup.token_id_prefix_plain,      \
    setup.token_id_prefix_name_space, \
    dummy                           = \
         read_namespaced_name(setup.token_id_prefix, 
                              "token prefix (options --token-prefix)")

    if len(setup.token_id_prefix_name_space) != 0 and setup.language.upper() == "C":
         error_msg("Token id prefix cannot contain a namespaces if '--language' is set to 'C'.")

    # (*) Output programming language        
    setup.language = setup.language.upper()
    verify_word_in_list(setup.language,
                        quex_core_engine_generator_languages_db.keys(),
                        "Programming language '%s' is not supported." % setup.language)
    setup.language_db = quex_core_engine_generator_languages_db[setup.language]
    setup.extension_db = global_extension_db[setup.language]

    # Is the output file naming scheme provided by the extension database
    # (Validation must happen immediately)
    if setup.extension_db.has_key(setup.output_file_naming_scheme) == False:
        error_msg("File extension scheme '%s' is not provided for language '%s'.\n" \
                  % (setup.output_file_naming_scheme, setup.language) + \
                  "Available schemes are: %s." % repr(setup.extension_db.keys())[1:-1])

    # Before file names can be prepared, determine the output directory
    # If 'source packaging' is enabled and no output directory is specified
    # then take the directory of the source packaging.
    if setup.source_package_directory != "" and setup.output_directory == "":
        setup.output_directory = setup.source_package_directory

    # (*) Output files
    prepare_file_names(setup)

    if setup.buffer_byte_order == "<system>": 
        setup.buffer_byte_order = sys.byteorder 
        setup.byte_order_is_that_of_current_system_f = True
    else:
        setup.byte_order_is_that_of_current_system_f = False

    if setup.buffer_element_size == "wchar_t":
        error_msg("Since Quex version 0.53.5, 'wchar_t' can no longer be specified\n"
                  "with option '--buffer-element-size' or '-bes'. Please, specify\n"
                  "'--buffer-element-type wchar_t' or '--bet'.")

    if setup.buffer_element_type == "wchar_t":
        setup.converter_ucs_coding_name = "WCHAR_T"

    if setup.buffer_codec != "":
        setup.buffer_element_size_irrelevant = True
    
    make_numbers(setup)

    # (*) Determine buffer element type and size (in bytes)
    if setup.buffer_element_size == -1:
        if global_character_type_db.has_key(setup.buffer_element_type):
            setup.buffer_element_size = global_character_type_db[setup.buffer_element_type][3]
        elif setup.buffer_element_type == "":
            setup.buffer_element_size = 1
        else:
            # If the buffer element type is defined, then here we know that it is 'unknown'
            # and Quex cannot know its size on its own.
            setup.buffer_element_size = -1

    if setup.buffer_element_type == "":
        if setup.buffer_element_size in [1, 2, 4]:
            setup.buffer_element_type = { 
                1: "uint8_t", 2: "uint16_t", 4: "uint32_t",
            }[setup.buffer_element_size]
        elif setup.buffer_element_size == -1:
            pass
        else:
            error_msg("Buffer element type cannot be determined for size '%i' which\n" \
                      % setup.buffer_element_size + 
                      "has been specified by '-b' or '--buffer-element-size'.")

    if setup.buffer_codec in ["utf8", "utf16"]:
        setup.buffer_codec_transformation_info = setup.buffer_codec + "-state-split"
    elif setup.buffer_codec != "":
        setup.buffer_codec_transformation_info = codec_db.get_codec_transformation_info(setup.buffer_codec)

    setup.converter_f = False
    if setup.converter_iconv_f or setup.converter_icu_f:
        setup.converter_f = True

    # The only case where no converter helper is required is where ASCII 
    # (Unicode restricted to [0, FF] is used.
    setup.converter_helper_required_f = True
    if setup.converter_f == False and setup.buffer_element_size == 1 and setup.buffer_codec == "":
        setup.converter_helper_required_f = False

    validation.do(setup, command_line, argv)

    if setup.converter_ucs_coding_name == "": 
        if global_character_type_db.has_key(setup.buffer_element_type):
            if setup.buffer_byte_order == "little": index = 1
            else:                                   index = 2
            setup.converter_ucs_coding_name = global_character_type_db[setup.buffer_element_type][index]

    if setup.token_id_foreign_definition_file != "": 
        CommentDelimiterList = [["//", "\n"], ["/*", "*/"]]
        # Regular expression to find '#include <something>' and extract the 'something'
        # in a 'group'. Note that '(' ')' cause the storage of parts of the match.
        IncludeRE            = "#[ \t]*include[ \t]*[\"<]([^\">]+)[\">]"
        #
        parse_token_id_file(setup.token_id_foreign_definition_file, 
                            setup.token_id_prefix, 
                            CommentDelimiterList, IncludeRE)
        if setup.token_id_prefix_plain != setup.token_id_prefix:
            # The 'plain' name space less token indices are also supported
            parse_token_id_file(setup.token_id_foreign_definition_file, 
                                setup.token_id_prefix_plain, 
                                CommentDelimiterList, IncludeRE)


    # (*) return setup ___________________________________________________________________
    return True
Example #11
0
def validate(setup, command_line, argv):
    """Does a consistency check for setup and the command line.
    """
    setup.output_directory = os.path.normpath(setup.output_directory)
    if setup.output_directory != "":
        # Check, if the output directory exists
        if os.access(setup.output_directory, os.F_OK) == False:
            error_msg(
                "The directory %s was specified for output, but does not exists."
                % setup.output_directory)
        if os.access(setup.output_directory, os.W_OK) == False:
            error_msg(
                "The directory %s was specified for output, but is not writeable."
                % setup.output_directory)

    # if the mode is 'plotting', then check wether a graphic format is speicified
    for plot_option in SETUP_INFO["plot_graphic_format"][0]:
        if plot_option in argv and setup.plot_graphic_format == "":
            error_msg("Option '%s' must be followed by a graphic format specifier (bmp, svg, jpg, ...)" % \
                      plot_option)

    # ensure that options are not specified twice
    for parameter, info in SETUP_INFO.items():
        occurence_n = 0
        for option in info[0]:
            occurence_n += argv.count(option)
        if occurence_n > 1:
            error_msg("Received more than one of the following options:\n" + \
                      "%s" % repr(info[0])[1:-1])

    # (*) Check for 'Depraceted' Options ___________________________________________________
    for name, info in DEPRECATED.items():
        command_line_options = SETUP_INFO[name][0]
        comment = info[0]
        depreciated_since_version = info[1]
        for option in command_line_options:
            if command_line.search(option):
                error_msg("Command line option '%s' is ignored.\n" % option + \
                          "Last version of Quex supporting this option is version %s. Please, visit\n" % depreciated_since_version + \
                          "http://quex.sourceforge.net for download---Or use a more advanced approach.\n" + \
                          comment)

    # (*) Check for 'Straying' Options ___________________________________________________
    options = []
    for key, info in SETUP_INFO.items():
        if key in DEPRECATED: continue
        if info[1] != None: options.extend(info[0])
    options.sort(lambda a, b: cmp(a.replace("-", ""), b.replace("-", "")))

    ufos = command_line.unidentified_options(options)
    if ufos != []:
        error_msg("Unidentified option(s) = " +  repr(ufos) + "\n" + \
                  __get_supported_command_line_option_description(options))

    if setup.input_derived_class_name != "" and \
       setup.input_derived_class_file == "":
        error_msg("Specified derived class '%s' on command line, but it was not\n" % \
                  setup.input_derived_class_name + \
                  "specified which file contains the definition of it.\n" + \
                  "use command line option '--derived-class-file'.\n")

    # check validity
    bpc = setup.bytes_per_ucs_code_point
    if bpc != "wchar_t":
        if bpc not in ["1", "2", "4"]:
            error_msg("choice for --bytes-per-ucs-code-point: %s" % bpc + \
                      "quex only supports 1, 2, or 4 bytes per character in internal engine")
            sys.exit(-1)
        else:
            setup.bytes_per_ucs_code_point = int(
                setup.bytes_per_ucs_code_point)

    if setup.byte_order == "<system>":
        setup.byte_order = sys.byteorder
    elif setup.byte_order not in ["<system>", "little", "big"]:
        error_msg("Byte order (option --endian) must be 'little' or 'big'.\n" + \
                  "Note, that this option is only interesting for cross plattform development.\n" + \
                  "By default, quex automatically chooses the endian type of your system.")

    # token offset and several ids
    if setup.input_token_counter_offset == setup.token_id_termination:
        error_msg(
            "Token id offset (--token-offset) == token id for termination (--token-id-termination)\n"
        )
    if setup.input_token_counter_offset == setup.token_id_uninitialized:
        error_msg(
            "Token id offset (--token-offset) == token id for uninitialized (--token-id-uninitialized)\n"
        )
    if setup.token_id_termination == setup.token_id_uninitialized:
        error_msg("Token id for termination (--token-id-termination) and uninitialized (--token-id-uninitialized)\n" + \
                  "are chosen to be the same. Maybe it works.", DontExitF=True)
    if setup.input_token_counter_offset < setup.token_id_uninitialized:
        error_msg("Token id offset (--token-offset) < token id uninitialized (--token-id-uninitialized).\n" + \
                  "Maybe it works.", DontExitF=True)
    if setup.input_token_counter_offset < setup.token_id_termination:
        error_msg("Token id offset (--token-offset) < token id termination (--token-id-termination).\n" + \
                  "Maybe it works.", DontExitF=True)

    # check that names are valid identifiers
    __check_identifier(setup, "input_token_id_prefix", "Token prefix")
    __check_identifier(setup, "output_engine_name", "Engine name")
    if setup.input_derived_class_name != "":
        __check_identifier(setup, "input_derived_class_name",
                           "Derived class name")
    if setup.input_token_class_name != "":
        __check_identifier(setup, "input_token_class_name", "Token class name")

    # '--token-class' and '--token-class-file' needs to appear together
    if setup.input_token_class_name != "" and setup.input_token_class_file == "":
        error_msg("User defined token class '%s':\n" % setup.input_token_class_name + \
                  "Specifying a user-defined token class via '--token-class' requires\n" + \
                  "that the token class file, also, needs to be specified via '--token-class-file'.")
    if setup.input_token_class_file != "" and setup.input_token_class_name == "":
        error_msg("User defined token class file '%s':\n" % setup.input_token_class_file + \
                  "Specifying a user-defined token class file via '--token-class-file' requires\n" + \
                  "that the token class, also, needs to be specified via '--token-class'.")

    # __check_identifier("token_id_termination",     "Token id for termination")
    # __check_identifier("token_id_uninitialized",   "Token id for uninitialized")
    __check_file_name(setup, "input_token_class_file",
                      "file containing user defined token class")
    __check_file_name(setup, "input_derived_class_file",
                      "file containing user derived lexer class")

    __check_file_name(setup, "input_foreign_token_id_file",
                      "file containing user token ids")
    __check_file_name(setup, "input_user_token_id_file",
                      "file containing user token ids")

    __check_file_name(setup, "input_mode_files", "quex source file")
Example #12
0
def do(argv):
    global setup

    # (*) Interpret Command Line (A) _____________________________________________________
    command_line = GetPot(argv)

    if command_line.search("--version", "-v"):
        print "Quex - A Mode Oriented Lexical Analyser"
        print "Version " + QUEX_VERSION
        print "(C) 2006-2008 Frank-Rene Schaefer"
        sys.exit(0)

    if command_line.search("--help", "-h"):
        print "Quex - A Mode Oriented Lexical Analyser"
        print "Please, consult the quex documentation for further help, or"
        print "visit http://quex.sourceforge.net."
        print "(C) 2006-2008 Frank-Rene Schaefer"
        sys.exit(0)

    for variable_name, info in SETUP_INFO.items():
        if info[1] == LIST:
            the_list = command_line.nominus_followers(info[0])
            if setup.__dict__.has_key(variable_name):
                setup.__dict__[variable_name].extend(the_list)
            else:
                setup.__dict__[variable_name] = the_list
        elif info[1] == FLAG:
            setup.__dict__[variable_name] = command_line.search(info[0])
        else:
            setup.__dict__[variable_name] = command_line.follow(
                info[1], info[0])

    setup.QUEX_VERSION = QUEX_VERSION
    setup.QUEX_INSTALLATION_DIR = QUEX_INSTALLATION_DIR
    setup.QUEX_TEMPLATE_DB_DIR = QUEX_TEMPLATE_DB_DIR

    # (*) Output files
    setup.output_file_stem = __prepare_file_name(setup, "")
    setup.output_token_id_file = __prepare_file_name(setup, "-token_ids")
    setup.output_header_file = __prepare_file_name(setup, "-internal.h")
    setup.output_code_file = __prepare_file_name(setup, ".cpp")
    setup.output_core_engine_file = __prepare_file_name(
        setup, "-core-engine.cpp")

    setup.buffer_limit_code = __get_integer(setup.buffer_limit_code,
                                            "--buffer-limit")
    setup.control_character_code_list = [setup.buffer_limit_code]

    setup.input_token_counter_offset = __get_integer(
        setup.input_token_counter_offset, "--token-offset")
    setup.token_id_termination = __get_integer(setup.token_id_termination,
                                               "--token-id-termination")
    setup.token_id_uninitialized = __get_integer(setup.token_id_uninitialized,
                                                 "--token-id-uninitialized")
    validate(setup, command_line, argv)

    if setup.input_foreign_token_id_file != "":
        CommentDelimiterList = [["//", "\n"], ["/*", "*/"]]
        # Regular expression to find '#include <something>' and extract the 'something'
        # in a 'group'. Note that '(' ')' cause the storage of parts of the match.
        IncludeRE = "#[ \t]*include[ \t]*[\"<]([^\">]+)[\">]"
        #
        parse_token_id_file(setup.input_foreign_token_id_file,
                            setup.input_token_id_prefix, CommentDelimiterList,
                            IncludeRE)

    # (*) Default values
    #     (Please, do not change this, otherwise no 'empty' options can be detected.)
    if setup.input_token_class_file == "":
        setup.input_token_class_file = SETUP_INFO["input_token_class_file"][2]
    if setup.input_token_class_name == "":
        setup.input_token_class_name = SETUP_INFO["input_token_class_name"][2]

    # (*) return setup ___________________________________________________________________
    return
Example #13
0
def do(argv):
    """RETURN:  True, if process needs to be started.
                False, if job is done.
    """
    global setup

    # (*) Interpret Command Line (A) _____________________________________________________
    command_line = GetPot(argv)

    if command_line.search("--version", "-v"):
        print "Quex - Fast Universal Lexical Analyzer Generator"
        print "Version " + QUEX_VERSION
        print "(C) 2006-2010 Frank-Rene Schaefer"
        print "ABSOLUTELY NO WARRANTY"
        return False

    if command_line.search("--help", "-h"):
        print "Quex - Fast Universal Lexical Analyzer Generator"
        print "Please, consult the quex documentation for further help, or"
        print "visit http://quex.org"
        print "(C) 2006-2010 Frank-Rene Schaefer"
        print "ABSOLUTELY NO WARRANTY"
        return False

    if command_line.search("--plot-format-list"):
        print quex.output.graphviz.interface.get_supported_graphic_format_description(
        )
        return False

    for variable_name, info in SETUP_INFO.items():
        # Some parameters are not set on the command line. Their entry is not associated
        # with a description list.
        if type(info) != list: continue

        if info[1] == FLAG:
            setup.__dict__[variable_name] = command_line.search(info[0])

        elif info[1] == NEGATED_FLAG:
            setup.__dict__[variable_name] = not command_line.search(info[0])

        elif info[1] == LIST:
            if not command_line.search(info[0]):
                setup.__dict__[variable_name] = []
            else:
                the_list = command_line.nominus_followers(info[0])
                if the_list == []:
                    error_msg("Option %s\nnot followed by anything." %
                              repr(info[0])[1:-1])

                if setup.__dict__.has_key(variable_name):
                    setup.__dict__[variable_name].extend(the_list)
                else:
                    setup.__dict__[variable_name] = the_list

        elif command_line.search(info[0]):
            if not command_line.search(info[0]):
                setup.__dict__[variable_name] = info[1]
            else:
                value = command_line.follow("--EMPTY--", info[0])
                if value == "--EMPTY--":
                    error_msg("Option %s\nnot followed by anything." %
                              repr(info[0])[1:-1])
                setup.__dict__[variable_name] = value

    # (*) Classes and their namespace
    setup.analyzer_class_name, \
    setup.analyzer_name_space, \
    setup.analyzer_name_safe   = \
         read_namespaced_name(setup.analyzer_class_name,
                              "analyzer engine (options -o, --engine, --analyzer-class)")

    setup.analyzer_derived_class_name,       \
    setup.analyzer_derived_class_name_space, \
    setup.analyzer_derived_class_name_safe = \
         read_namespaced_name(setup.analyzer_derived_class_name,
                              "derived analyzer class (options --derived-class, --dc)",
                              AllowEmptyF=True)

    if setup.analyzer_name_space == []:
        setup.analyzer_name_space = ["quex"]

    if setup.token_class_name == "":
        setup.token_class_name = "%s::Token" % reduce(
            lambda a, b: a + "::" + b, setup.analyzer_name_space)

    # Token classes and derived classes have the freedom not to open a namespace,
    # thus no check 'if namespace == []'.
    setup.token_class_name,       \
    setup.token_class_name_space, \
    setup.token_class_name_safe = \
         read_namespaced_name(setup.token_class_name,
                              "token class (options --token-class, --tc)")

    if setup.token_class_file != "":
        lexer_mode.token_type_definition = \
                ManualTokenClassSetup(setup.token_class_file,
                                      setup.token_class_name,
                                      setup.token_class_name_space,
                                      setup.token_class_name_safe,
                                      setup.token_id_type)

    if setup.token_class_name_space == []:
        setup.token_class_name_space = deepcopy(setup.analyzer_name_space)

    setup.token_id_prefix_plain,      \
    setup.token_id_prefix_name_space, \
    dummy                           = \
         read_namespaced_name(setup.token_id_prefix,
                              "token prefix (options --token-prefix)")

    if len(setup.token_id_prefix_name_space) != 0 and setup.language.upper(
    ) == "C":
        error_msg(
            "Token id prefix cannot contain a namespaces if '--language' is set to 'C'."
        )

    # (*) Output programming language
    setup.language = setup.language.upper()
    verify_word_in_list(
        setup.language, quex_core_engine_generator_languages_db.keys(),
        "Programming language '%s' is not supported." % setup.language)
    setup.language_db = quex_core_engine_generator_languages_db[setup.language]
    setup.extension_db = global_extension_db[setup.language]

    # Is the output file naming scheme provided by the extension database
    # (Validation must happen immediately)
    if setup.extension_db.has_key(setup.output_file_naming_scheme) == False:
        error_msg("File extension scheme '%s' is not provided for language '%s'.\n" \
                  % (setup.output_file_naming_scheme, setup.language) + \
                  "Available schemes are: %s." % repr(setup.extension_db.keys())[1:-1])

    # Before file names can be prepared, determine the output directory
    # If 'source packaging' is enabled and no output directory is specified
    # then take the directory of the source packaging.
    if setup.source_package_directory != "" and setup.output_directory == "":
        setup.output_directory = setup.source_package_directory

    # (*) Output files
    prepare_file_names(setup)

    if setup.buffer_byte_order == "<system>":
        setup.buffer_byte_order = sys.byteorder
        setup.byte_order_is_that_of_current_system_f = True
    else:
        setup.byte_order_is_that_of_current_system_f = False

    if setup.buffer_element_size == "wchar_t":
        error_msg(
            "Since Quex version 0.53.5, 'wchar_t' can no longer be specified\n"
            "with option '--buffer-element-size' or '-bes'. Please, specify\n"
            "'--buffer-element-type wchar_t' or '--bet'.")

    if setup.buffer_element_type == "wchar_t":
        setup.converter_ucs_coding_name = "WCHAR_T"

    if setup.buffer_codec != "":
        setup.buffer_element_size_irrelevant = True

    make_numbers(setup)

    # (*) Determine buffer element type and size (in bytes)
    if setup.buffer_element_size == -1:
        if global_character_type_db.has_key(setup.buffer_element_type):
            setup.buffer_element_size = global_character_type_db[
                setup.buffer_element_type][3]
        elif setup.buffer_element_type == "":
            setup.buffer_element_size = 1
        else:
            # If the buffer element type is defined, then here we know that it is 'unknown'
            # and Quex cannot know its size on its own.
            setup.buffer_element_size = -1

    if setup.buffer_element_type == "":
        if setup.buffer_element_size in [1, 2, 4]:
            setup.buffer_element_type = {
                1: "uint8_t",
                2: "uint16_t",
                4: "uint32_t",
            }[setup.buffer_element_size]
        elif setup.buffer_element_size == -1:
            pass
        else:
            error_msg("Buffer element type cannot be determined for size '%i' which\n" \
                      % setup.buffer_element_size +
                      "has been specified by '-b' or '--buffer-element-size'.")

    if setup.buffer_codec in ["utf8", "utf16"]:
        setup.buffer_codec_transformation_info = setup.buffer_codec + "-state-split"
    elif setup.buffer_codec != "":
        setup.buffer_codec_transformation_info = codec_db.get_codec_transformation_info(
            setup.buffer_codec)

    setup.converter_f = False
    if setup.converter_iconv_f or setup.converter_icu_f:
        setup.converter_f = True

    # The only case where no converter helper is required is where ASCII
    # (Unicode restricted to [0, FF] is used.
    setup.converter_helper_required_f = True
    if setup.converter_f == False and setup.buffer_element_size == 1 and setup.buffer_codec == "":
        setup.converter_helper_required_f = False

    validation.do(setup, command_line, argv)

    if setup.converter_ucs_coding_name == "":
        if global_character_type_db.has_key(setup.buffer_element_type):
            if setup.buffer_byte_order == "little": index = 1
            else: index = 2
            setup.converter_ucs_coding_name = global_character_type_db[
                setup.buffer_element_type][index]

    if setup.token_id_foreign_definition_file != "":
        CommentDelimiterList = [["//", "\n"], ["/*", "*/"]]
        # Regular expression to find '#include <something>' and extract the 'something'
        # in a 'group'. Note that '(' ')' cause the storage of parts of the match.
        IncludeRE = "#[ \t]*include[ \t]*[\"<]([^\">]+)[\">]"
        #
        parse_token_id_file(setup.token_id_foreign_definition_file,
                            setup.token_id_prefix, CommentDelimiterList,
                            IncludeRE)
        if setup.token_id_prefix_plain != setup.token_id_prefix:
            # The 'plain' name space less token indices are also supported
            parse_token_id_file(setup.token_id_foreign_definition_file,
                                setup.token_id_prefix_plain,
                                CommentDelimiterList, IncludeRE)

    # (*) return setup ___________________________________________________________________
    return True
Example #14
0
def validate(setup, command_line, argv):
    """Does a consistency check for setup and the command line.
    """
    setup.output_directory = os.path.normpath(setup.output_directory)
    if setup.output_directory != "":
        # Check, if the output directory exists
        if os.access(setup.output_directory, os.F_OK) == False:
            error_msg("The directory %s was specified for output, but does not exists." % setup.output_directory)
        if os.access(setup.output_directory, os.W_OK) == False:
            error_msg("The directory %s was specified for output, but is not writeable." % setup.output_directory)

    # if the mode is 'plotting', then check wether a graphic format is speicified
    for plot_option in SETUP_INFO["plot_graphic_format"][0]:
        if plot_option in argv and setup.plot_graphic_format == "":
            error_msg("Option '%s' must be followed by a graphic format specifier (bmp, svg, jpg, ...)" % \
                      plot_option)

    # ensure that options are not specified twice
    for parameter, info in SETUP_INFO.items():
        occurence_n = 0 
        for option in info[0]:
            occurence_n += argv.count(option)
        if occurence_n > 1:
            error_msg("Received more than one of the following options:\n" + \
                      "%s" % repr(info[0])[1:-1])

    # (*) Check for 'Depraceted' Options ___________________________________________________
    for name, info in DEPRECATED.items():
        command_line_options = SETUP_INFO[name][0]
        comment                   = info[0]
        depreciated_since_version = info[1]
        for option in command_line_options:
            if command_line.search(option):
                error_msg("Command line option '%s' is ignored.\n" % option + \
                          "Last version of Quex supporting this option is version %s. Please, visit\n" % depreciated_since_version + \
                          "http://quex.sourceforge.net for download---Or use a more advanced approach.\n" + \
                          comment)
                          

    # (*) Check for 'Straying' Options ___________________________________________________
    options = []
    for key, info in SETUP_INFO.items():
        if key in DEPRECATED: continue
        if info[1] != None: options.extend(info[0])
    options.sort(lambda a,b: cmp(a.replace("-",""), b.replace("-","")))

    ufos = command_line.unidentified_options(options)
    if ufos != []:
        error_msg("Unidentified option(s) = " +  repr(ufos) + "\n" + \
                  __get_supported_command_line_option_description(options))

    if setup.input_derived_class_name != "" and \
       setup.input_derived_class_file == "":
            error_msg("Specified derived class '%s' on command line, but it was not\n" % \
                      setup.input_derived_class_name + \
                      "specified which file contains the definition of it.\n" + \
                      "use command line option '--derived-class-file'.\n")

    # check validity
    bpc = setup.bytes_per_ucs_code_point
    if bpc != "wchar_t":
        if bpc not in ["1", "2", "4"]:
            error_msg("choice for --bytes-per-ucs-code-point: %s" % bpc + \
                      "quex only supports 1, 2, or 4 bytes per character in internal engine")
            sys.exit(-1)
        else:
            setup.bytes_per_ucs_code_point = int(setup.bytes_per_ucs_code_point)

    if setup.byte_order == "<system>": 
        setup.byte_order = sys.byteorder 
    elif setup.byte_order not in ["<system>", "little", "big"]:
        error_msg("Byte order (option --endian) must be 'little' or 'big'.\n" + \
                  "Note, that this option is only interesting for cross plattform development.\n" + \
                  "By default, quex automatically chooses the endian type of your system.")

    # token offset and several ids
    if setup.input_token_counter_offset == setup.token_id_termination:
        error_msg("Token id offset (--token-offset) == token id for termination (--token-id-termination)\n")
    if setup.input_token_counter_offset == setup.token_id_uninitialized:
        error_msg("Token id offset (--token-offset) == token id for uninitialized (--token-id-uninitialized)\n")
    if setup.token_id_termination == setup.token_id_uninitialized:
        error_msg("Token id for termination (--token-id-termination) and uninitialized (--token-id-uninitialized)\n" + \
                  "are chosen to be the same. Maybe it works.", DontExitF=True)
    if setup.input_token_counter_offset < setup.token_id_uninitialized:
        error_msg("Token id offset (--token-offset) < token id uninitialized (--token-id-uninitialized).\n" + \
                  "Maybe it works.", DontExitF=True)
    if setup.input_token_counter_offset < setup.token_id_termination:
        error_msg("Token id offset (--token-offset) < token id termination (--token-id-termination).\n" + \
                  "Maybe it works.", DontExitF=True)
    
    # check that names are valid identifiers
    __check_identifier(setup, "input_token_id_prefix", "Token prefix")
    __check_identifier(setup, "output_engine_name",    "Engine name")
    if setup.input_derived_class_name != "": 
        __check_identifier(setup, "input_derived_class_name", "Derived class name")
    if setup.input_token_class_name != "": 
        __check_identifier(setup, "input_token_class_name",   "Token class name")
    
    # '--token-class' and '--token-class-file' needs to appear together
    if setup.input_token_class_name != "" and setup.input_token_class_file == "":
        error_msg("User defined token class '%s':\n" % setup.input_token_class_name + \
                  "Specifying a user-defined token class via '--token-class' requires\n" + \
                  "that the token class file, also, needs to be specified via '--token-class-file'.")
    if setup.input_token_class_file != "" and setup.input_token_class_name == "":
        error_msg("User defined token class file '%s':\n" % setup.input_token_class_file + \
                  "Specifying a user-defined token class file via '--token-class-file' requires\n" + \
                  "that the token class, also, needs to be specified via '--token-class'.")

    # __check_identifier("token_id_termination",     "Token id for termination")
    # __check_identifier("token_id_uninitialized",   "Token id for uninitialized")
    __check_file_name(setup, "input_token_class_file", "file containing user defined token class")
    __check_file_name(setup, "input_derived_class_file", "file containing user derived lexer class")

    __check_file_name(setup, "input_foreign_token_id_file", "file containing user token ids")
    __check_file_name(setup, "input_user_token_id_file",    "file containing user token ids")

    __check_file_name(setup, "input_mode_files", "quex source file")
Example #15
0
def do(argv):
    global setup

    # (*) Interpret Command Line (A) _____________________________________________________
    command_line = GetPot(argv)

    if command_line.search("--version", "-v"):
        print "Quex - A Mode Oriented Lexical Analyser"
        print "Version " + QUEX_VERSION
        print "(C) 2006-2008 Frank-Rene Schaefer"
        sys.exit(0)

    if command_line.search("--help", "-h"):
        print "Quex - A Mode Oriented Lexical Analyser"
        print "Please, consult the quex documentation for further help, or"
        print "visit http://quex.sourceforge.net."
        print "(C) 2006-2008 Frank-Rene Schaefer"
        sys.exit(0)

    for variable_name, info in SETUP_INFO.items():
        if info[1]   == LIST:
            the_list = command_line.nominus_followers(info[0])
            if setup.__dict__.has_key(variable_name):
                setup.__dict__[variable_name].extend(the_list)        
            else:
                setup.__dict__[variable_name] = the_list
        elif info[1] == FLAG:
            setup.__dict__[variable_name] = command_line.search(info[0])        
        else:
            setup.__dict__[variable_name] = command_line.follow(info[1], info[0])

    setup.QUEX_VERSION          = QUEX_VERSION
    setup.QUEX_INSTALLATION_DIR = QUEX_INSTALLATION_DIR
    setup.QUEX_TEMPLATE_DB_DIR  = QUEX_TEMPLATE_DB_DIR
            
    # (*) Output files
    setup.output_file_stem        = __prepare_file_name(setup, "")
    setup.output_token_id_file    = __prepare_file_name(setup, "-token_ids")
    setup.output_header_file      = __prepare_file_name(setup, "-internal.h")
    setup.output_code_file        = __prepare_file_name(setup, ".cpp")
    setup.output_core_engine_file = __prepare_file_name(setup, "-core-engine.cpp")

    setup.buffer_limit_code    = __get_integer(setup.buffer_limit_code, "--buffer-limit")
    setup.control_character_code_list = [setup.buffer_limit_code]

    setup.input_token_counter_offset = __get_integer(setup.input_token_counter_offset,
                                                     "--token-offset")
    setup.token_id_termination       = __get_integer(setup.token_id_termination, 
                                                     "--token-id-termination")
    setup.token_id_uninitialized     = __get_integer(setup.token_id_uninitialized, 
                                                     "--token-id-uninitialized")
    validate(setup, command_line, argv)

    if setup.input_foreign_token_id_file != "": 
        CommentDelimiterList = [["//", "\n"], ["/*", "*/"]]
        # Regular expression to find '#include <something>' and extract the 'something'
        # in a 'group'. Note that '(' ')' cause the storage of parts of the match.
        IncludeRE            = "#[ \t]*include[ \t]*[\"<]([^\">]+)[\">]"
        #
        parse_token_id_file(setup.input_foreign_token_id_file, setup.input_token_id_prefix, 
                            CommentDelimiterList, IncludeRE)

    # (*) Default values
    #     (Please, do not change this, otherwise no 'empty' options can be detected.)
    if setup.input_token_class_file == "": 
        setup.input_token_class_file = SETUP_INFO["input_token_class_file"][2]
    if setup.input_token_class_name == "": 
        setup.input_token_class_name = SETUP_INFO["input_token_class_name"][2]

    # (*) return setup ___________________________________________________________________
    return