예제 #1
def __start_mode(applicable_mode_name_list, mode_name_list):
    """If more then one mode is defined, then that requires an explicit 
       definition 'start = mode'.
    assert len(applicable_mode_name_list) != 0

    start_mode = lexer_mode.initial_mode.get_pure_code()
    if start_mode == "":
        # Choose an applicable mode as start mode
        start_mode = applicable_mode_name_list[0]
        lexer_mode.initial_mode = CodeFragment(start_mode)
        if len(applicable_mode_name_list) > 1:
            error_msg("No initial mode defined via 'start' while more than one applicable mode exists.\n" + \
                      "Use for example 'start = %s;' in the quex source file to define an initial mode." \
                      % start_mode)
        # This Branch: start mode is applicable and present

        FileName = lexer_mode.initial_mode.filename
        LineN = lexer_mode.initial_mode.line_n
        # Start mode present and applicable?
        verify_word_in_list(start_mode, mode_name_list,
                            "Start mode '%s' is not defined." % start_mode,
                            FileName, LineN)
            start_mode, applicable_mode_name_list,
            "Start mode '%s' is inheritable only and cannot be instantiated." %
            start_mode, FileName, LineN)
예제 #2
def __start_mode(applicable_mode_name_list, mode_name_list):
    """If more then one mode is defined, then that requires an explicit 
       definition 'start = mode'.
    assert len(applicable_mode_name_list) != 0

    start_mode = lexer_mode.initial_mode.get_pure_code()
    if start_mode == "":
        # Choose an applicable mode as start mode
        start_mode              = applicable_mode_name_list[0]
        lexer_mode.initial_mode = CodeFragment(start_mode)
        if len(applicable_mode_name_list) > 1:
            error_msg("No initial mode defined via 'start' while more than one applicable mode exists.\n" + \
                      "Use for example 'start = %s;' in the quex source file to define an initial mode." \
                      % start_mode)
        # This Branch: start mode is applicable and present

        FileName = lexer_mode.initial_mode.filename
        LineN    = lexer_mode.initial_mode.line_n
        # Start mode present and applicable?
        verify_word_in_list(start_mode, mode_name_list,
                            "Start mode '%s' is not defined." % start_mode,
                            FileName, LineN)
        verify_word_in_list(start_mode, applicable_mode_name_list,
                            "Start mode '%s' is inheritable only and cannot be instantiated." % start_mode,
                            FileName, LineN)
예제 #3
def snap_set_term(stream, PatternDict):
    __debug_entry("set_term", stream)

    operation_list = ["union", "intersection", "difference", "inverse"]
    character_set_list = special_character_set_db().keys()

    position = stream.tell()

    # if there is no following '(', then enter the 'snap_expression' block below
    word = read_identifier(stream)

    if word in operation_list:
        set_list = snap_set_list(stream, word, PatternDict)
        # if an error occurs during set_list parsing, an exception is thrown about syntax error

        L = len(set_list)
        result = set_list[0]

        if word == "inverse":
            # The inverse of multiple sets, is to be the inverse of the union of these sets.
            if L > 1:
                for character_set in set_list[1:]:
            result = result.inverse()
            if Setup.get_character_value_limit() != -1:
                    Interval(0, Setup.get_character_value_limit()))
            return __debug_exit(result, stream)

        if L < 2:
            raise RegularExpressionException("Regular Expression: A %s operation needs at least\n" % word + \
                                             "two sets to operate on them.")

        if word == "union":
            for set in set_list[1:]:
        elif word == "intersection":
            for set in set_list[1:]:
        elif word == "difference":
            for set in set_list[1:]:

    elif word in character_set_list:
        result = special_character_set_db()[word]

    elif word != "":
        verify_word_in_list(word, character_set_list + operation_list,
                            "Unknown keyword '%s'." % word, stream)
        result = snap_set_expression(stream, PatternDict)

    return __debug_exit(result, stream)
예제 #4
def __get_distinct_codec_name_for_alias(CodecAlias, FH=-1, LineN=None):
    """Arguments FH and LineN correspond to the arguments of error_msg."""
    assert len(CodecAlias) != 0

    for record in get_codec_list_db():
        if CodecAlias in record[1] or CodecAlias == record[0]: 
            return record[0]

    verify_word_in_list(CodecAlias, get_supported_codec_list(), 
                        "Character encoding '%s' unknown to current version of quex." % CodecAlias,
                        FH, LineN)
예제 #5
def snap_set_term(stream, PatternDict):
    __debug_entry("set_term", stream)    

    operation_list     = [ "union", "intersection", "difference", "inverse"]
    character_set_list = special_character_set_db().keys()

    position = stream.tell()

    # if there is no following '(', then enter the 'snap_expression' block below
    word = read_identifier(stream)

    if word in operation_list: 
        set_list = snap_set_list(stream, word, PatternDict)
        # if an error occurs during set_list parsing, an exception is thrown about syntax error

        L      = len(set_list)
        result = set_list[0]

        if word == "inverse":
            # The inverse of multiple sets, is to be the inverse of the union of these sets.
            if L > 1:
                for character_set in set_list[1:]:
            result = result.inverse()
            if Setup.get_character_value_limit() != -1:
                result.intersect_with(Interval(0, Setup.get_character_value_limit()))
            return __debug_exit(result, stream)

        if L < 2:
            raise RegularExpressionException("Regular Expression: A %s operation needs at least\n" % word + \
                                             "two sets to operate on them.")
        if   word == "union":
            for set in set_list[1:]:
        elif word == "intersection":
            for set in set_list[1:]:
        elif word == "difference":
            for set in set_list[1:]:

    elif word in character_set_list:
        result = special_character_set_db()[word]

    elif word != "":
        verify_word_in_list(word, character_set_list + operation_list, 
                            "Unknown keyword '%s'." % word, stream)
        result = snap_set_expression(stream, PatternDict)

    return __debug_exit(result, stream)
예제 #6
def __entry_exit_transitions(mode, mode_name_list):
    FileName = mode.filename
    LineN = mode.line_n
    for mode_name in mode.options["exit"]:

        verify_word_in_list(mode_name, mode_name_list,
                            "Mode '%s' allows entry from\nmode '%s' but no such mode exists." % \
                            (mode.name, mode_name), FileName, LineN)

        that_mode = lexer_mode.mode_description_db[mode_name]

        # Other mode allows all entries => don't worry.
        if len(that_mode.options["entry"]) == 0: continue

        # Other mode restricts the entries from other modes
        # => check if this mode or one of the base modes can enter
        for base_mode in mode.get_base_mode_sequence():
            if base_mode.name in that_mode.options["entry"]: break
            error_msg("Mode '%s' has an exit to mode '%s' but" %
                      (mode.name, mode_name),
            error_msg("mode '%s' has no entry for mode '%s'\n" % (mode_name, mode.name) + \
                      "or any of its base modes.",
                      that_mode.filename, that_mode.line_n)

    for mode_name in mode.options["entry"]:
        # Does that mode exist?
        verify_word_in_list(mode_name, mode_name_list,
                            "Mode '%s' allows entry from\nmode '%s' but no such mode exists." % \
                            (mode.name, mode_name), FileName, LineN)

        that_mode = lexer_mode.mode_description_db[mode_name]
        # Other mode allows all exits => don't worry.
        if len(that_mode.options["exit"]) == 0: continue

        # Other mode restricts the exits to other modes
        # => check if this mode or one of the base modes can be reached
        for base_mode in mode.get_base_mode_sequence():
            if base_mode.name in that_mode.options["exit"]: break
            error_msg("Mode '%s' has an entry for mode '%s' but" %
                      (mode.name, mode_name),
            error_msg("mode '%s' has no exit to mode '%s'\n" % (mode_name, mode.name) + \
                      "or any of its base modes.",
                      that_mode.filename, that_mode.line_n)
예제 #7
def __get_distinct_codec_name_for_alias(CodecAlias, FH=-1, LineN=None):
    """Arguments FH and LineN correspond to the arguments of error_msg."""
    assert len(CodecAlias) != 0

    for record in get_codec_list_db():
        if CodecAlias in record[1] or CodecAlias == record[0]:
            return record[0]

        CodecAlias, get_supported_codec_list(),
        "Character encoding '%s' unknown to current version of quex." %
        CodecAlias, FH, LineN)
예제 #8
def get_codecs_for_language(Language):
    result = []
    for record in get_codec_list_db():
        codec = record[0]
        if codec not in get_supported_codec_list(): continue
        if Language in record[2]: 
    if result == []:
        verify_word_in_list(Language, get_supported_language_list(),
                "No codec found for language '%s'." % Language)
    return result
예제 #9
def get_codecs_for_language(Language):

    result = []
    for record in get_codec_list_db():
        codec = record[0]
        if codec not in get_supported_codec_list(): continue
        if Language in record[2]:
    if result == []:
        verify_word_in_list(Language, get_supported_language_list(),
                            "No codec found for language '%s'." % Language)
    return result
예제 #10
    def __determine_base_mode_sequence(self, ModeDescr, InheritancePath):
        """Determine the sequence of base modes. The type of sequencing determines
           also the pattern precedence. The 'deep first' scheme is chosen here. For
           example a mode hierarchie of

                                     /   \ 
                                    B     C
                                   / \   / \
                                  D  E  F   G

           results in a sequence: (A, B, D, E, C, F, G).reverse()

           This means, that patterns and event handlers of 'E' have precedence over
           'C' because they are the childs of a preceding base mode.

           This function detects circular inheritance.
        if ModeDescr.name in InheritancePath:
            msg = "mode '%s'\n" % InheritancePath[0]
            for mode_name in InheritancePath[InheritancePath.index(ModeDescr.
                                                                   name) + 1:]:
                msg += "   inherits mode '%s'\n" % mode_name
            msg += "   inherits mode '%s'" % ModeDescr.name

            error_msg("circular inheritance detected:\n" + msg,
                      ModeDescr.filename, ModeDescr.line_n)

        base_mode_name_list_reversed = deepcopy(ModeDescr.base_modes)
        for name in base_mode_name_list_reversed:
            # -- does mode exist?
                name, mode_description_db.keys(),
                "Mode '%s' inherits mode '%s' which does not exist." %
                (ModeDescr.name, name), ModeDescr.filename, ModeDescr.line_n)

            if name in map(lambda m: m.name, self.__base_mode_sequence):

            # -- grab the mode description
            mode_descr = mode_description_db[name]
                mode_descr, InheritancePath + [ModeDescr.name])


        return self.__base_mode_sequence
예제 #11
def __entry_exit_transitions(mode, mode_name_list):
    FileName = mode.filename
    LineN    = mode.line_n
    for mode_name in mode.options["exit"]:

        verify_word_in_list(mode_name, mode_name_list,
                            "Mode '%s' allows entry from\nmode '%s' but no such mode exists." % \
                            (mode.name, mode_name), FileName, LineN)

        that_mode = lexer_mode.mode_description_db[mode_name]

        # Other mode allows all entries => don't worry.
        if len(that_mode.options["entry"]) == 0: continue

        # Other mode restricts the entries from other modes
        # => check if this mode or one of the base modes can enter
        for base_mode in mode.get_base_mode_sequence():
            if base_mode.name in that_mode.options["entry"]: break
            error_msg("Mode '%s' has an exit to mode '%s' but" % (mode.name, mode_name),
                      FileName, LineN, DontExitF=True, WarningF=False)
            error_msg("mode '%s' has no entry for mode '%s'\n" % (mode_name, mode.name) + \
                      "or any of its base modes.",
                      that_mode.filename, that_mode.line_n)

    for mode_name in mode.options["entry"]:
        # Does that mode exist?
        verify_word_in_list(mode_name, mode_name_list,
                            "Mode '%s' allows entry from\nmode '%s' but no such mode exists." % \
                            (mode.name, mode_name), FileName, LineN)

        that_mode = lexer_mode.mode_description_db[mode_name]
        # Other mode allows all exits => don't worry.
        if len(that_mode.options["exit"]) == 0: continue

        # Other mode restricts the exits to other modes
        # => check if this mode or one of the base modes can be reached
        for base_mode in mode.get_base_mode_sequence():
            if base_mode.name in that_mode.options["exit"]: break
            error_msg("Mode '%s' has an entry for mode '%s' but" % (mode.name, mode_name),
                      FileName, LineN, DontExitF=True, WarningF=False)
            error_msg("mode '%s' has no exit to mode '%s'\n" % (mode_name, mode.name) + \
                      "or any of its base modes.",
                      that_mode.filename, that_mode.line_n)
예제 #12
    def __determine_base_mode_sequence(self, ModeDescr, InheritancePath):
        """Determine the sequence of base modes. The type of sequencing determines
           also the pattern precedence. The 'deep first' scheme is chosen here. For
           example a mode hierarchie of

                                     /   \ 
                                    B     C
                                   / \   / \
                                  D  E  F   G

           results in a sequence: (A, B, D, E, C, F, G).reverse()

           This means, that patterns and event handlers of 'E' have precedence over
           'C' because they are the childs of a preceding base mode.

           This function detects circular inheritance.
        if ModeDescr.name in InheritancePath:
            msg = "mode '%s'\n" % InheritancePath[0]
            for mode_name in InheritancePath[InheritancePath.index(ModeDescr.name) + 1:]:
                msg += "   inherits mode '%s'\n" % mode_name
            msg += "   inherits mode '%s'" % ModeDescr.name

            error_msg("circular inheritance detected:\n" + msg, ModeDescr.filename, ModeDescr.line_n)

        base_mode_name_list_reversed = deepcopy(ModeDescr.base_modes)
        for name in base_mode_name_list_reversed:
            # -- does mode exist?
            verify_word_in_list(name, mode_description_db.keys(),
                                "Mode '%s' inherits mode '%s' which does not exist." % (ModeDescr.name, name),
                                ModeDescr.filename, ModeDescr.line_n)

            if name in map(lambda m: m.name, self.__base_mode_sequence): continue

            # -- grab the mode description
            mode_descr = mode_description_db[name]
            self.__determine_base_mode_sequence(mode_descr, InheritancePath + [ModeDescr.name])


        return self.__base_mode_sequence
예제 #13
def do(fh):
    """Parses pattern definitions of the form:
          [ \t]                                       => grid 4;
          [:intersection([:alpha:], [\X064-\X066]):]  => space 1;

       In other words the right hand side *must* be a character set.
    indentation_setup = IndentationSetup(fh)

    # NOTE: Catching of EOF happens in caller: parse_section(...)

    while 1 + 1 == 2:

        if check(fh, ">"):
            return indentation_setup

        # A regular expression state machine
        pattern_str, state_machine = regular_expression.parse(fh)

        if not check(fh, "=>"):
            error_msg("Missing '=>' after character set definition.", fh)

        identifier = read_identifier(fh)
        if identifier == "":
            error_msg("Missing identifier for indentation element definition.",

            identifier, ["space", "grid", "bad", "newline", "suppressor"],
            "Unrecognized indentation specifier '%s'." % identifier, fh)

        trigger_set = None
        if identifier in ["space", "bad", "grid"]:
            if len(state_machine.states) != 2:
                error_msg("For indentation '%s' only patterns are addmissible which\n" % identifier + \
                          "can be matched by a single character, e.g. \" \" or [a-z].", fh)
            transition_map = state_machine.get_init_state().transitions(
            assert len(transition_map) == 1
            trigger_set = transition_map.values()[0]

        if identifier == "space":
            value = read_integer(fh)
            if value != None:
                indentation_setup.specify_space(pattern_str, trigger_set,
                                                value, fh)
                # not a number received, is it an identifier?
                variable = read_identifier(fh)
                if variable != "":
                    indentation_setup.specify_space(pattern_str, trigger_set,
                                                    variable, fh)
                    indentation_setup.specify_space(pattern_str, trigger_set,
                                                    1, fh)

        elif identifier == "grid":
            value = read_integer(fh)
            if value != None:
                indentation_setup.specify_grid(pattern_str, trigger_set, value,
                # not a number received, is it an identifier?
                variable = read_identifier(fh)
                if variable != "":
                    indentation_setup.specify_grid(pattern_str, trigger_set,
                                                   variable, fh)
                        "Missing integer or variable name after keyword 'grid'.",

        elif identifier == "bad":
            indentation_setup.specify_bad(pattern_str, trigger_set, fh)

        elif identifier == "newline":
            indentation_setup.specify_newline(pattern_str, state_machine, fh)

        elif identifier == "suppressor":
            indentation_setup.specify_suppressor(pattern_str, state_machine,

            assert False, "Unreachable code reached."

        if not check(fh, ";"):
                "Missing ';' after indentation '%s' specification." %
                identifier, fh)
예제 #14
def do(setup, command_line, argv):
    """Does a consistency check for setup and the command line.

    setup.output_directory = os.path.normpath(setup.output_directory)
    if setup.output_directory != "":
        # Check, if the output directory exists
        if os.access(setup.output_directory, os.F_OK) == False:
            error_msg("The directory %s was specified for output, but does not exists." % setup.output_directory)
        if os.access(setup.output_directory, os.W_OK) == False:
            error_msg("The directory %s was specified for output, but is not writeable." % setup.output_directory)

    # if the mode is 'plotting', then check wether a graphic format is speicified
    for plot_option in SETUP_INFO["plot_graphic_format"][0]:
        if plot_option in argv and setup.plot_graphic_format == "":
            error_msg("Option '%s' must be followed by a graphic format specifier (bmp, svg, jpg, ...)" % \

    if setup.plot_character_display not in ["hex", "utf8"]:
        error_msg("Plot character display must be either 'hex' or 'utf8'.\nFound: '%s'" % 

    # ensure that options are not specified twice
    for parameter, info in SETUP_INFO.items():
        if type(info) != list: continue
        occurence_n = 0 
        for option in info[0]:
            occurence_n += argv.count(option)
        if occurence_n > 1:
            error_msg("Received more than one of the following options:\n" + \
                      "%s" % repr(info[0])[1:-1])

    # (*) Check for 'Depraceted' Options ___________________________________________________
    for name, info in DEPRECATED.items():
        command_line_options = SETUP_INFO[name][0]
        comment                   = info[0]
        depreciated_since_version = info[1]
        for option in command_line_options:
            if command_line.search(option):
                error_msg("Command line option '%s' is ignored.\n" % option + \
                          comment + "\n" + \
                          "Last version of Quex supporting this option is version %s. Please, visit\n" % \
                          depreciated_since_version + \
                          "http://quex.org for further information.")
    # (*) Check for 'Straying' Options ___________________________________________________
    options = []
    for key, info in SETUP_INFO.items():
        if type(info) != list: continue
        if key in DEPRECATED: continue
        if info[1] != None: options.extend(info[0])
    options.sort(lambda a,b: cmp(a.replace("-",""), b.replace("-","")))

    ufos = command_line.unidentified_options(options)
    if ufos != []:
        error_msg("Unidentified option(s) = " +  repr(ufos) + "\n" + \

    if setup.analyzer_derived_class_name != "" and \
       setup.analyzer_derived_class_file == "":
            error_msg("Specified derived class '%s' on command line, but it was not\n" % \
                      setup.analyzer_derived_class_name + \
                      "specified which file contains the definition of it.\n" + \
                      "use command line option '--derived-class-file'.\n")

    if setup.buffer_element_size not in [-1, 1, 2, 4]:
        error_msg("The setting of '--buffer-element-size' (or '-b') can only be\n" 
                  "1, 2, or 4 (found %s)." % repr(setup.buffer_element_size))

    if setup.buffer_byte_order not in ["<system>", "little", "big"]:
        error_msg("Byte order (option --endian) must be 'little', 'big', or '<system>'.\n" + \
                  "Note, that this option is only interesting for cross plattform development.\n" + \
                  "By default, quex automatically chooses the endian type of your system.")

    # Manually written token class requires token class name to be specified
    if setup.token_class_file != "" and command_line.search("--token-class", "--tc") == False:
        error_msg("The use of a manually written token class requires that the name of the class\n"
                  "is specified on the command line via the '--token-class' option.")
    # Token queue
    if setup.token_policy != "queue" and command_line.search("--token-queue-size"):
        error_msg("Option --token-queue-size determines a fixed token queue size. This makes\n" + \
                  "only sense in conjunction with '--token-policy queue'.\n")
    if setup.token_queue_size <= setup.token_queue_safety_border + 1:
        if setup.token_queue_size == setup.token_queue_safety_border: cmp_str = "equal to"
        else:                                                         cmp_str = "less than"
        error_msg("Token queue size is %i is %s token queue safety border %i + 1.\n" % \
                  (setup.token_queue_size, cmp_str, setup.token_queue_safety_border) + 
                  "Set appropriate values with --token-queue-size and --token-queue-safety-border.")

    # Check that names are valid identifiers
    __check_identifier(setup, "token_id_prefix_plain",    "Token prefix")
    __check_identifier(setup, "analyzer_class_name", "Engine name")
    if setup.analyzer_derived_class_name != "": 
        __check_identifier(setup, "analyzer_derived_class_name", "Derived class name")
    __check_file_name(setup, "token_class_file",            "file containing token class definition")
    __check_file_name(setup, "analyzer_derived_class_file", "file containing user derived lexer class")
    __check_file_name(setup, "token_id_foreign_definition_file", "file containing user token ids")
    __check_file_name(setup, "input_mode_files", "quex source file")

    # Check that not more than one converter is specified
    converter_n = 0
    if setup.converter_iconv_f:             converter_n += 1
    if setup.converter_icu_f:               converter_n += 1 
    if setup.converter_user_new_func != "": converter_n += 1
    if converter_n > 1:
        error_msg("More than one character converter has been specified. Note, that the\n" + \
                  "options '--icu', '--iconv', and '--converter-new' (or '--cn') are\n"    + \
                  "to be used mutually exclusively.")
    if converter_n == 1 and setup.buffer_codec != "":  
        error_msg("An engine that is to be generated for a specific codec cannot rely\n"      + \
                  "on converters. Do no use '--codec' together with '--icu', '--iconv', or\n" + \

    # If a user defined type is specified for 'engine character type' and 
    # a converter, then the name of the target type must be specified explicitly.
    if         setup.buffer_element_type != "" \
       and not global_character_type_db.has_key(setup.buffer_element_type) \
       and     setup.converter_ucs_coding_name == "" \
       and     converter_n != 0:
        tc = setup.buffer_element_type
        error_msg("A character code converter has been specified. It is supposed to convert\n" + \
                  "incoming data into an internal buffer of unicode characters. The size of\n" + \
                  "each character is determined by '%s' which is a user defined type.\n" % tc  + \
                  "\n" + \
                  "Quex cannot determine automatically the name that the converter requires\n" +      \
                  "to produce unicode characters for type '%s'. It must be specified by the\n" % tc + \
                  "command line option '--converter-ucs-coding-name' or '--cucn'.")

    # Token transmission policy
    token_policy_list = ["queue", "single", "users_token", "users_queue"]
    if setup.token_policy not in token_policy_list:
        error_msg("Token policy '%s' not supported. Use one of the following:\n" % setup.token_policy + \
    elif setup.token_policy == "users_token":
        error_msg("Token policy 'users_queue' has be deprecated since 0.49.1. Use\n"
                  "equivalent policy 'single'.")
    elif setup.token_policy == "users_queue":
        error_msg("Token policy 'users_queue' has be deprecated since 0.49.1\n")

    # Internal engine character encoding
    def __codec_vs_buffer_element_size(CodecName, RequiredBufferElementSize):
        if   setup.buffer_codec        != CodecName:                 return
        elif setup.buffer_element_size == RequiredBufferElementSize: return

        if setup.buffer_element_size == -1: 
            msg_str = "undetermined (found type '%s')" % setup.buffer_element_type
            msg_str = "is not %i (found %i)" % (RequiredBufferElementSize, setup.buffer_element_size)

        error_msg("Using codec '%s' while buffer element size %s.\n" % (CodecName, msg_str) + 
                  "Consult command line argument '--buffer-element-size'.")

    if setup.buffer_codec != "":
                            codec_db.get_supported_codec_list() + ["utf8", "utf16"],
                            "Codec '%s' is not supported." % setup.buffer_codec)
        __codec_vs_buffer_element_size("utf8", 1)
        __codec_vs_buffer_element_size("utf16", 2)

    # Path Compression
    if setup.compression_path_uniform_f and setup.compression_path_f:
        error_msg("Both flags for path compression were set: '--path-compression' and\n" 
                  "'--path-compression-uniform'. Please, choose only one!")
예제 #15
def do(setup, command_line, argv):
    """Does a consistency check for setup and the command line.

    setup.output_directory = os.path.normpath(setup.output_directory)
    if setup.output_directory != "":
        # Check, if the output directory exists
        if os.access(setup.output_directory, os.F_OK) == False:
                "The directory %s was specified for output, but does not exists."
                % setup.output_directory)
        if os.access(setup.output_directory, os.W_OK) == False:
                "The directory %s was specified for output, but is not writeable."
                % setup.output_directory)

    # if the mode is 'plotting', then check wether a graphic format is speicified
    for plot_option in SETUP_INFO["plot_graphic_format"][0]:
        if plot_option in argv and setup.plot_graphic_format == "":
            error_msg("Option '%s' must be followed by a graphic format specifier (bmp, svg, jpg, ...)" % \

    if setup.plot_character_display not in ["hex", "utf8"]:
            "Plot character display must be either 'hex' or 'utf8'.\nFound: '%s'"
            % setup.plot_character_display)

    # ensure that options are not specified twice
    for parameter, info in SETUP_INFO.items():
        if type(info) != list: continue
        occurence_n = 0
        for option in info[0]:
            occurence_n += argv.count(option)
        if occurence_n > 1:
            error_msg("Received more than one of the following options:\n" + \
                      "%s" % repr(info[0])[1:-1])

    # (*) Check for 'Depraceted' Options ___________________________________________________
    for name, info in DEPRECATED.items():
        command_line_options = SETUP_INFO[name][0]
        comment = info[0]
        depreciated_since_version = info[1]
        for option in command_line_options:
            if command_line.search(option):
                error_msg("Command line option '%s' is ignored.\n" % option + \
                          comment + "\n" + \
                          "Last version of Quex supporting this option is version %s. Please, visit\n" % \
                          depreciated_since_version + \
                          "http://quex.org for further information.")

    # (*) Check for 'Straying' Options ___________________________________________________
    options = []
    for key, info in SETUP_INFO.items():
        if type(info) != list: continue
        if key in DEPRECATED: continue
        if info[1] != None: options.extend(info[0])
    options.sort(lambda a, b: cmp(a.replace("-", ""), b.replace("-", "")))

    ufos = command_line.unidentified_options(options)
    if ufos != []:
        error_msg("Unidentified option(s) = " +  repr(ufos) + "\n" + \

    if setup.analyzer_derived_class_name != "" and \
       setup.analyzer_derived_class_file == "":
        error_msg("Specified derived class '%s' on command line, but it was not\n" % \
                  setup.analyzer_derived_class_name + \
                  "specified which file contains the definition of it.\n" + \
                  "use command line option '--derived-class-file'.\n")

    if setup.buffer_element_size not in [-1, 1, 2, 4]:
            "The setting of '--buffer-element-size' (or '-b') can only be\n"
            "1, 2, or 4 (found %s)." % repr(setup.buffer_element_size))

    if setup.buffer_byte_order not in ["<system>", "little", "big"]:
        error_msg("Byte order (option --endian) must be 'little', 'big', or '<system>'.\n" + \
                  "Note, that this option is only interesting for cross plattform development.\n" + \
                  "By default, quex automatically chooses the endian type of your system.")

    # Manually written token class requires token class name to be specified
    if setup.token_class_file != "" and command_line.search(
            "--token-class", "--tc") == False:
            "The use of a manually written token class requires that the name of the class\n"
            "is specified on the command line via the '--token-class' option.")

    # Token queue
    if setup.token_policy != "queue" and command_line.search(
        error_msg("Option --token-queue-size determines a fixed token queue size. This makes\n" + \
                  "only sense in conjunction with '--token-policy queue'.\n")
    if setup.token_queue_size <= setup.token_queue_safety_border + 1:
        if setup.token_queue_size == setup.token_queue_safety_border:
            cmp_str = "equal to"
            cmp_str = "less than"
        error_msg("Token queue size is %i is %s token queue safety border %i + 1.\n" % \
                  (setup.token_queue_size, cmp_str, setup.token_queue_safety_border) +
                  "Set appropriate values with --token-queue-size and --token-queue-safety-border.")

    # Check that names are valid identifiers
    __check_identifier(setup, "token_id_prefix_plain", "Token prefix")
    __check_identifier(setup, "analyzer_class_name", "Engine name")
    if setup.analyzer_derived_class_name != "":
        __check_identifier(setup, "analyzer_derived_class_name",
                           "Derived class name")

    __check_file_name(setup, "token_class_file",
                      "file containing token class definition")
    __check_file_name(setup, "analyzer_derived_class_file",
                      "file containing user derived lexer class")
    __check_file_name(setup, "token_id_foreign_definition_file",
                      "file containing user token ids")
    __check_file_name(setup, "input_mode_files", "quex source file")

    # Check that not more than one converter is specified
    converter_n = 0
    if setup.converter_iconv_f: converter_n += 1
    if setup.converter_icu_f: converter_n += 1
    if setup.converter_user_new_func != "": converter_n += 1
    if converter_n > 1:
        error_msg("More than one character converter has been specified. Note, that the\n" + \
                  "options '--icu', '--iconv', and '--converter-new' (or '--cn') are\n"    + \
                  "to be used mutually exclusively.")
    if converter_n == 1 and setup.buffer_codec != "":
        error_msg("An engine that is to be generated for a specific codec cannot rely\n"      + \
                  "on converters. Do no use '--codec' together with '--icu', '--iconv', or\n" + \

    # If a user defined type is specified for 'engine character type' and
    # a converter, then the name of the target type must be specified explicitly.
    if         setup.buffer_element_type != "" \
       and not global_character_type_db.has_key(setup.buffer_element_type) \
       and     setup.converter_ucs_coding_name == "" \
       and     converter_n != 0:
        tc = setup.buffer_element_type
        error_msg("A character code converter has been specified. It is supposed to convert\n" + \
                  "incoming data into an internal buffer of unicode characters. The size of\n" + \
                  "each character is determined by '%s' which is a user defined type.\n" % tc  + \
                  "\n" + \
                  "Quex cannot determine automatically the name that the converter requires\n" +      \
                  "to produce unicode characters for type '%s'. It must be specified by the\n" % tc + \
                  "command line option '--converter-ucs-coding-name' or '--cucn'.")

    # Token transmission policy
    token_policy_list = ["queue", "single", "users_token", "users_queue"]
    if setup.token_policy not in token_policy_list:
        error_msg("Token policy '%s' not supported. Use one of the following:\n" % setup.token_policy + \
    elif setup.token_policy == "users_token":
            "Token policy 'users_queue' has be deprecated since 0.49.1. Use\n"
            "equivalent policy 'single'.")
    elif setup.token_policy == "users_queue":
            "Token policy 'users_queue' has be deprecated since 0.49.1\n")

    # Internal engine character encoding
    def __codec_vs_buffer_element_size(CodecName, RequiredBufferElementSize):
        if setup.buffer_codec != CodecName: return
        elif setup.buffer_element_size == RequiredBufferElementSize: return

        if setup.buffer_element_size == -1:
            msg_str = "undetermined (found type '%s')" % setup.buffer_element_type
            msg_str = "is not %i (found %i)" % (RequiredBufferElementSize,

        error_msg("Using codec '%s' while buffer element size %s.\n" %
                  (CodecName, msg_str) +
                  "Consult command line argument '--buffer-element-size'.")

    if setup.buffer_codec != "":
            codec_db.get_supported_codec_list() + ["utf8", "utf16"],
            "Codec '%s' is not supported." % setup.buffer_codec)
        __codec_vs_buffer_element_size("utf8", 1)
        __codec_vs_buffer_element_size("utf16", 2)

    # Path Compression
    if setup.compression_path_uniform_f and setup.compression_path_f:
            "Both flags for path compression were set: '--path-compression' and\n"
            "'--path-compression-uniform'. Please, choose only one!")
예제 #16
def do(argv):
    """RETURN:  True, if process needs to be started.
                False, if job is done.
    global setup

    # (*) Interpret Command Line (A) _____________________________________________________
    command_line = GetPot(argv)

    if command_line.search("--version", "-v"):
        print "Quex - Fast Universal Lexical Analyzer Generator"
        print "Version " + QUEX_VERSION
        print "(C) 2006-2010 Frank-Rene Schaefer"
        return False

    if command_line.search("--help", "-h"):
        print "Quex - Fast Universal Lexical Analyzer Generator"
        print "Please, consult the quex documentation for further help, or"
        print "visit http://quex.org"
        print "(C) 2006-2010 Frank-Rene Schaefer"
        return False

    if command_line.search("--plot-format-list"):
        print quex.output.graphviz.interface.get_supported_graphic_format_description()
        return False

    for variable_name, info in SETUP_INFO.items():
        # Some parameters are not set on the command line. Their entry is not associated
        # with a description list.
        if type(info) != list: continue

        if info[1] == FLAG:
            setup.__dict__[variable_name] = command_line.search(info[0])        

        elif info[1] == NEGATED_FLAG:
            setup.__dict__[variable_name] = not command_line.search(info[0])        

        elif info[1] == LIST:
            if not command_line.search(info[0]):
                setup.__dict__[variable_name] = []
                the_list = command_line.nominus_followers(info[0])
                if the_list == []:
                    error_msg("Option %s\nnot followed by anything." % repr(info[0])[1:-1])

                if setup.__dict__.has_key(variable_name):
                    setup.__dict__[variable_name] = the_list

        elif command_line.search(info[0]):
            if not command_line.search(info[0]):
                setup.__dict__[variable_name] = info[1]
                value = command_line.follow("--EMPTY--", info[0])
                if value == "--EMPTY--":
                    error_msg("Option %s\nnot followed by anything." % repr(info[0])[1:-1])
                setup.__dict__[variable_name] = value

    # (*) Classes and their namespace
    setup.analyzer_class_name, \
    setup.analyzer_name_space, \
    setup.analyzer_name_safe   = \
                              "analyzer engine (options -o, --engine, --analyzer-class)")

    setup.analyzer_derived_class_name,       \
    setup.analyzer_derived_class_name_space, \
    setup.analyzer_derived_class_name_safe = \
                              "derived analyzer class (options --derived-class, --dc)",

    if setup.analyzer_name_space == []:
        setup.analyzer_name_space = ["quex"]

    if setup.token_class_name == "":
        setup.token_class_name = "%s::Token" % reduce(lambda a, b: a + "::" + b, setup.analyzer_name_space)

    # Token classes and derived classes have the freedom not to open a namespace,
    # thus no check 'if namespace == []'.
    setup.token_class_name,       \
    setup.token_class_name_space, \
    setup.token_class_name_safe = \
                              "token class (options --token-class, --tc)")

    if setup.token_class_file != "":
        lexer_mode.token_type_definition = \

    if setup.token_class_name_space == []:
        setup.token_class_name_space = deepcopy(setup.analyzer_name_space)

    setup.token_id_prefix_plain,      \
    setup.token_id_prefix_name_space, \
    dummy                           = \
                              "token prefix (options --token-prefix)")

    if len(setup.token_id_prefix_name_space) != 0 and setup.language.upper() == "C":
         error_msg("Token id prefix cannot contain a namespaces if '--language' is set to 'C'.")

    # (*) Output programming language        
    setup.language = setup.language.upper()
                        "Programming language '%s' is not supported." % setup.language)
    setup.language_db = quex_core_engine_generator_languages_db[setup.language]
    setup.extension_db = global_extension_db[setup.language]

    # Is the output file naming scheme provided by the extension database
    # (Validation must happen immediately)
    if setup.extension_db.has_key(setup.output_file_naming_scheme) == False:
        error_msg("File extension scheme '%s' is not provided for language '%s'.\n" \
                  % (setup.output_file_naming_scheme, setup.language) + \
                  "Available schemes are: %s." % repr(setup.extension_db.keys())[1:-1])

    # Before file names can be prepared, determine the output directory
    # If 'source packaging' is enabled and no output directory is specified
    # then take the directory of the source packaging.
    if setup.source_package_directory != "" and setup.output_directory == "":
        setup.output_directory = setup.source_package_directory

    # (*) Output files

    if setup.buffer_byte_order == "<system>": 
        setup.buffer_byte_order = sys.byteorder 
        setup.byte_order_is_that_of_current_system_f = True
        setup.byte_order_is_that_of_current_system_f = False

    if setup.buffer_element_size == "wchar_t":
        error_msg("Since Quex version 0.53.5, 'wchar_t' can no longer be specified\n"
                  "with option '--buffer-element-size' or '-bes'. Please, specify\n"
                  "'--buffer-element-type wchar_t' or '--bet'.")

    if setup.buffer_element_type == "wchar_t":
        setup.converter_ucs_coding_name = "WCHAR_T"

    if setup.buffer_codec != "":
        setup.buffer_element_size_irrelevant = True

    # (*) Determine buffer element type and size (in bytes)
    if setup.buffer_element_size == -1:
        if global_character_type_db.has_key(setup.buffer_element_type):
            setup.buffer_element_size = global_character_type_db[setup.buffer_element_type][3]
        elif setup.buffer_element_type == "":
            setup.buffer_element_size = 1
            # If the buffer element type is defined, then here we know that it is 'unknown'
            # and Quex cannot know its size on its own.
            setup.buffer_element_size = -1

    if setup.buffer_element_type == "":
        if setup.buffer_element_size in [1, 2, 4]:
            setup.buffer_element_type = { 
                1: "uint8_t", 2: "uint16_t", 4: "uint32_t",
        elif setup.buffer_element_size == -1:
            error_msg("Buffer element type cannot be determined for size '%i' which\n" \
                      % setup.buffer_element_size + 
                      "has been specified by '-b' or '--buffer-element-size'.")

    if setup.buffer_codec in ["utf8", "utf16"]:
        setup.buffer_codec_transformation_info = setup.buffer_codec + "-state-split"
    elif setup.buffer_codec != "":
        setup.buffer_codec_transformation_info = codec_db.get_codec_transformation_info(setup.buffer_codec)

    setup.converter_f = False
    if setup.converter_iconv_f or setup.converter_icu_f:
        setup.converter_f = True

    # The only case where no converter helper is required is where ASCII 
    # (Unicode restricted to [0, FF] is used.
    setup.converter_helper_required_f = True
    if setup.converter_f == False and setup.buffer_element_size == 1 and setup.buffer_codec == "":
        setup.converter_helper_required_f = False

    validation.do(setup, command_line, argv)

    if setup.converter_ucs_coding_name == "": 
        if global_character_type_db.has_key(setup.buffer_element_type):
            if setup.buffer_byte_order == "little": index = 1
            else:                                   index = 2
            setup.converter_ucs_coding_name = global_character_type_db[setup.buffer_element_type][index]

    if setup.token_id_foreign_definition_file != "": 
        CommentDelimiterList = [["//", "\n"], ["/*", "*/"]]
        # Regular expression to find '#include <something>' and extract the 'something'
        # in a 'group'. Note that '(' ')' cause the storage of parts of the match.
        IncludeRE            = "#[ \t]*include[ \t]*[\"<]([^\">]+)[\">]"
                            CommentDelimiterList, IncludeRE)
        if setup.token_id_prefix_plain != setup.token_id_prefix:
            # The 'plain' name space less token indices are also supported
                                CommentDelimiterList, IncludeRE)

    # (*) return setup ___________________________________________________________________
    return True
예제 #17
def do(fh):
    """Parses pattern definitions of the form:
          [ \t]                                       => grid 4;
          [:intersection([:alpha:], [\X064-\X066]):]  => space 1;

       In other words the right hand side *must* be a character set.
    indentation_setup = IndentationSetup(fh)

    # NOTE: Catching of EOF happens in caller: parse_section(...)

    while 1 + 1 == 2:

        if check(fh, ">"):
            return indentation_setup

        # A regular expression state machine
        pattern_str, state_machine = regular_expression.parse(fh)

        if not check(fh, "=>"):
            error_msg("Missing '=>' after character set definition.", fh)

        identifier = read_identifier(fh)
        if identifier == "":
            error_msg("Missing identifier for indentation element definition.", fh)

            ["space", "grid", "bad", "newline", "suppressor"],
            "Unrecognized indentation specifier '%s'." % identifier,

        trigger_set = None
        if identifier in ["space", "bad", "grid"]:
            if len(state_machine.states) != 2:
                    "For indentation '%s' only patterns are addmissible which\n" % identifier
                    + 'can be matched by a single character, e.g. " " or [a-z].',
            transition_map = state_machine.get_init_state().transitions().get_map()
            assert len(transition_map) == 1
            trigger_set = transition_map.values()[0]

        if identifier == "space":
            value = read_integer(fh)
            if value != None:
                indentation_setup.specify_space(pattern_str, trigger_set, value, fh)
                # not a number received, is it an identifier?
                variable = read_identifier(fh)
                if variable != "":
                    indentation_setup.specify_space(pattern_str, trigger_set, variable, fh)
                    indentation_setup.specify_space(pattern_str, trigger_set, 1, fh)

        elif identifier == "grid":
            value = read_integer(fh)
            if value != None:
                indentation_setup.specify_grid(pattern_str, trigger_set, value, fh)
                # not a number received, is it an identifier?
                variable = read_identifier(fh)
                if variable != "":
                    indentation_setup.specify_grid(pattern_str, trigger_set, variable, fh)
                    error_msg("Missing integer or variable name after keyword 'grid'.", fh)

        elif identifier == "bad":
            indentation_setup.specify_bad(pattern_str, trigger_set, fh)

        elif identifier == "newline":
            indentation_setup.specify_newline(pattern_str, state_machine, fh)

        elif identifier == "suppressor":
            indentation_setup.specify_suppressor(pattern_str, state_machine, fh)

            assert False, "Unreachable code reached."

        if not check(fh, ";"):
            error_msg("Missing ';' after indentation '%s' specification." % identifier, fh)
예제 #18
def do(argv):
    """RETURN:  True, if process needs to be started.
                False, if job is done.
    global setup

    # (*) Interpret Command Line (A) _____________________________________________________
    command_line = GetPot(argv)

    if command_line.search("--version", "-v"):
        print "Quex - Fast Universal Lexical Analyzer Generator"
        print "Version " + QUEX_VERSION
        print "(C) 2006-2010 Frank-Rene Schaefer"
        return False

    if command_line.search("--help", "-h"):
        print "Quex - Fast Universal Lexical Analyzer Generator"
        print "Please, consult the quex documentation for further help, or"
        print "visit http://quex.org"
        print "(C) 2006-2010 Frank-Rene Schaefer"
        return False

    if command_line.search("--plot-format-list"):
        print quex.output.graphviz.interface.get_supported_graphic_format_description(
        return False

    for variable_name, info in SETUP_INFO.items():
        # Some parameters are not set on the command line. Their entry is not associated
        # with a description list.
        if type(info) != list: continue

        if info[1] == FLAG:
            setup.__dict__[variable_name] = command_line.search(info[0])

        elif info[1] == NEGATED_FLAG:
            setup.__dict__[variable_name] = not command_line.search(info[0])

        elif info[1] == LIST:
            if not command_line.search(info[0]):
                setup.__dict__[variable_name] = []
                the_list = command_line.nominus_followers(info[0])
                if the_list == []:
                    error_msg("Option %s\nnot followed by anything." %

                if setup.__dict__.has_key(variable_name):
                    setup.__dict__[variable_name] = the_list

        elif command_line.search(info[0]):
            if not command_line.search(info[0]):
                setup.__dict__[variable_name] = info[1]
                value = command_line.follow("--EMPTY--", info[0])
                if value == "--EMPTY--":
                    error_msg("Option %s\nnot followed by anything." %
                setup.__dict__[variable_name] = value

    # (*) Classes and their namespace
    setup.analyzer_class_name, \
    setup.analyzer_name_space, \
    setup.analyzer_name_safe   = \
                              "analyzer engine (options -o, --engine, --analyzer-class)")

    setup.analyzer_derived_class_name,       \
    setup.analyzer_derived_class_name_space, \
    setup.analyzer_derived_class_name_safe = \
                              "derived analyzer class (options --derived-class, --dc)",

    if setup.analyzer_name_space == []:
        setup.analyzer_name_space = ["quex"]

    if setup.token_class_name == "":
        setup.token_class_name = "%s::Token" % reduce(
            lambda a, b: a + "::" + b, setup.analyzer_name_space)

    # Token classes and derived classes have the freedom not to open a namespace,
    # thus no check 'if namespace == []'.
    setup.token_class_name,       \
    setup.token_class_name_space, \
    setup.token_class_name_safe = \
                              "token class (options --token-class, --tc)")

    if setup.token_class_file != "":
        lexer_mode.token_type_definition = \

    if setup.token_class_name_space == []:
        setup.token_class_name_space = deepcopy(setup.analyzer_name_space)

    setup.token_id_prefix_plain,      \
    setup.token_id_prefix_name_space, \
    dummy                           = \
                              "token prefix (options --token-prefix)")

    if len(setup.token_id_prefix_name_space) != 0 and setup.language.upper(
    ) == "C":
            "Token id prefix cannot contain a namespaces if '--language' is set to 'C'."

    # (*) Output programming language
    setup.language = setup.language.upper()
        setup.language, quex_core_engine_generator_languages_db.keys(),
        "Programming language '%s' is not supported." % setup.language)
    setup.language_db = quex_core_engine_generator_languages_db[setup.language]
    setup.extension_db = global_extension_db[setup.language]

    # Is the output file naming scheme provided by the extension database
    # (Validation must happen immediately)
    if setup.extension_db.has_key(setup.output_file_naming_scheme) == False:
        error_msg("File extension scheme '%s' is not provided for language '%s'.\n" \
                  % (setup.output_file_naming_scheme, setup.language) + \
                  "Available schemes are: %s." % repr(setup.extension_db.keys())[1:-1])

    # Before file names can be prepared, determine the output directory
    # If 'source packaging' is enabled and no output directory is specified
    # then take the directory of the source packaging.
    if setup.source_package_directory != "" and setup.output_directory == "":
        setup.output_directory = setup.source_package_directory

    # (*) Output files

    if setup.buffer_byte_order == "<system>":
        setup.buffer_byte_order = sys.byteorder
        setup.byte_order_is_that_of_current_system_f = True
        setup.byte_order_is_that_of_current_system_f = False

    if setup.buffer_element_size == "wchar_t":
            "Since Quex version 0.53.5, 'wchar_t' can no longer be specified\n"
            "with option '--buffer-element-size' or '-bes'. Please, specify\n"
            "'--buffer-element-type wchar_t' or '--bet'.")

    if setup.buffer_element_type == "wchar_t":
        setup.converter_ucs_coding_name = "WCHAR_T"

    if setup.buffer_codec != "":
        setup.buffer_element_size_irrelevant = True


    # (*) Determine buffer element type and size (in bytes)
    if setup.buffer_element_size == -1:
        if global_character_type_db.has_key(setup.buffer_element_type):
            setup.buffer_element_size = global_character_type_db[
        elif setup.buffer_element_type == "":
            setup.buffer_element_size = 1
            # If the buffer element type is defined, then here we know that it is 'unknown'
            # and Quex cannot know its size on its own.
            setup.buffer_element_size = -1

    if setup.buffer_element_type == "":
        if setup.buffer_element_size in [1, 2, 4]:
            setup.buffer_element_type = {
                1: "uint8_t",
                2: "uint16_t",
                4: "uint32_t",
        elif setup.buffer_element_size == -1:
            error_msg("Buffer element type cannot be determined for size '%i' which\n" \
                      % setup.buffer_element_size +
                      "has been specified by '-b' or '--buffer-element-size'.")

    if setup.buffer_codec in ["utf8", "utf16"]:
        setup.buffer_codec_transformation_info = setup.buffer_codec + "-state-split"
    elif setup.buffer_codec != "":
        setup.buffer_codec_transformation_info = codec_db.get_codec_transformation_info(

    setup.converter_f = False
    if setup.converter_iconv_f or setup.converter_icu_f:
        setup.converter_f = True

    # The only case where no converter helper is required is where ASCII
    # (Unicode restricted to [0, FF] is used.
    setup.converter_helper_required_f = True
    if setup.converter_f == False and setup.buffer_element_size == 1 and setup.buffer_codec == "":
        setup.converter_helper_required_f = False

    validation.do(setup, command_line, argv)

    if setup.converter_ucs_coding_name == "":
        if global_character_type_db.has_key(setup.buffer_element_type):
            if setup.buffer_byte_order == "little": index = 1
            else: index = 2
            setup.converter_ucs_coding_name = global_character_type_db[

    if setup.token_id_foreign_definition_file != "":
        CommentDelimiterList = [["//", "\n"], ["/*", "*/"]]
        # Regular expression to find '#include <something>' and extract the 'something'
        # in a 'group'. Note that '(' ')' cause the storage of parts of the match.
        IncludeRE = "#[ \t]*include[ \t]*[\"<]([^\">]+)[\">]"
                            setup.token_id_prefix, CommentDelimiterList,
        if setup.token_id_prefix_plain != setup.token_id_prefix:
            # The 'plain' name space less token indices are also supported
                                CommentDelimiterList, IncludeRE)

    # (*) return setup ___________________________________________________________________
    return True