Beispiel #1
0
def parse_token_id_definitions(fh, NamesOnlyF=False):
    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    token_prefix       = Setup.token_id_prefix
    token_prefix_plain = Setup.token_id_prefix_plain # i.e. without name space included

    if NamesOnlyF: db = {}
    else:          db = blackboard.token_id_db

    skip_whitespace(fh)
    if not check(fh, "{"):
        error_msg("missing opening '{' for after 'token' section identifier.\n", fh)

    while check(fh, "}") == False:
        skip_whitespace(fh)

        candidate = read_identifier(fh, TolerantF=True)

        if candidate == "":
            error_msg("Missing valid token identifier." % candidate, fh)

        # -- check the name, if it starts with the token prefix paste a warning
        if candidate.find(token_prefix) == 0:
            error_msg("Token identifier '%s' starts with token prefix '%s'.\n" % (candidate, token_prefix) + \
                      "Token prefix is mounted automatically. This token id appears in the source\n" + \
                      "code as '%s%s'." % (token_prefix, candidate), \
                      fh, DontExitF=True)
        elif candidate.find(token_prefix_plain) == 0:
            error_msg("Token identifier '%s' starts with token prefix '%s'.\n" % (candidate, token_prefix) + \
                      "Token prefix is mounted automatically. This token id appears in the source\n" + \
                      "code as '%s%s'." % (token_prefix, candidate), \
                      fh, DontExitF=True)

        skip_whitespace(fh)

        if NamesOnlyF:
            db[token_prefix + candidate] = True
            if check(fh, ";") == False:
                error_msg("Missing ';' after definition of token identifier '%s'.\n" % candidate + \
                          "This is mandatory since Quex version 0.50.1.", fh)
            continue

        # Parse a possible numeric value after '='
        numeric_value = None
        if check(fh, "="):
            skip_whitespace(fh)
            numeric_value = read_integer(fh)
            if numeric_value is None:
                error_msg("Missing number after '=' for token identifier '%s'." % candidate, fh)

        if check(fh, ";") == False:
            error_msg("Missing ';' after definition of token identifier '%s'.\n" % candidate + \
                      "This is mandatory since Quex version 0.50.1.", fh)

        db[candidate] = TokenInfo(candidate, numeric_value, Filename=fh.name, LineN=get_current_line_info_number(fh))

    if NamesOnlyF:
        result = db.keys()
        result.sort()
        return result
Beispiel #2
0
def parse_state_statistics(fh):
    skip_whitespace(fh)
    check_or_die(fh, "{")

    check_or_die(fh, "mode:")
    skip_whitespace(fh)
    mode_name = read_until_letter(fh, ";")

    check_or_die(fh, "state:")
    skip_whitespace(fh)
    state_index = read_integer(fh)
    check_or_die(fh, ";")

    check_or_die(fh, "{")
    skip_whitespace(fh)
    boundary_list_str = read_until_letter(fh, ";")
    skip_whitespace(fh)
    counter_list_str = read_until_letter(fh, ";")
    skip_whitespace(fh)
    check_or_die(fh, "}")
    check_or_die(fh, "}")

    def help(X):
        map(lambda x: int(x), X.strip().split())

    return StateStatistics(mode_name, state_index, help(boundary_list_str), help(counter_list_str))
Beispiel #3
0
def parse_state_statistics(fh):
    skip_whitespace(fh)
    check_or_die(fh, "{")

    check_or_die(fh, "mode:")
    skip_whitespace(fh)
    mode_name = read_until_letter(fh, ";")

    check_or_die(fh, "state:")
    skip_whitespace(fh)
    state_index = read_integer(fh)
    check_or_die(fh, ";")

    check_or_die(fh, "{")
    skip_whitespace(fh)
    boundary_list_str = read_until_letter(fh, ";")
    skip_whitespace(fh)
    counter_list_str = read_until_letter(fh, ";")
    skip_whitespace(fh)
    check_or_die(fh, "}")
    check_or_die(fh, "}")

    def help(X):
        map(lambda x: int(x), X.strip().split())

    return StateStatistics(mode_name, state_index, help(boundary_list_str),
                           help(counter_list_str))
Beispiel #4
0
def get_codec_transformation_info(Codec=None, FileName=None, FH=-1, LineN=None):
    """Provides the information about the relation of character codes in a particular 
       coding to unicode character codes. It is provided in the following form:

       # Codec Values                 Unicode Values
       [ (Source0_Begin, Source0_End, TargetInterval0_Begin), 
         (Source1_Begin, Source1_End, TargetInterval1_Begin),
         (Source2_Begin, Source2_End, TargetInterval2_Begin), 
         ... 
       ]

       Arguments FH and LineN correspond to the arguments of error_msg.
    """
    assert Codec is not None or FileName is not None

    if FileName is not None:
        file_name = FileName
    else:
        distinct_codec = __get_distinct_codec_name_for_alias(Codec)
        file_name      = __codec_db_path + "/%s.dat" % distinct_codec

    fh = open_file_or_die(file_name, "rb")

    # Read coding into data structure
    transformation_list = []
    try:
        while 1 + 1 == 2:
            skip_whitespace(fh)
            source_begin = read_integer(fh)
            if source_begin is None:
                error_msg("Missing integer (source interval begin) in codec file.", fh)
            skip_whitespace(fh)
            source_size = read_integer(fh)
            if source_size is None:
                error_msg("Missing integer (source interval size) in codec file.", fh)
            skip_whitespace(fh)
            target_begin = read_integer(fh)
            if target_begin is None:
                error_msg("Missing integer (target interval begin) in codec file.", fh)

            source_end = source_begin + source_size
            transformation_list.append([source_begin, source_end, target_begin])
    except EndOfStreamException:
        pass

    return transformation_list
Beispiel #5
0
def do(section_list, fh):
    """Parses a codec information file. The described codec can only be
    a 'static character length' encoding. That is every character in the
    code occupies the same number of bytes.

    RETURNS: [0] Set of characters in unicode which are covered by the
                 described codec.
             [1] Range of values in the codec elements.
    """
    source_set = NumberSet()
    drain_set = NumberSet()

    error_str = None

    try:
        while error_str is None:
            skip_whitespace(fh)
            source_begin = read_integer(fh)
            if source_begin is None:
                error_str = "Missing integer (source interval begin) in codec file."
                continue

            skip_whitespace(fh)
            source_size = read_integer(fh)
            if source_size is None:
                error_str = "Missing integer (source interval size) in codec file."
                continue

            skip_whitespace(fh)
            target_begin = read_integer(fh)
            if target_begin is None:
                error_str = "Missing integer (target interval begin) in codec file."
                continue

            source_end = source_begin + source_size
            list.append(section_list, [source_begin, source_end, target_begin])

            source_set.add_interval(Interval(source_begin, source_end))
            drain_set.add_interval(
                Interval(target_begin, target_begin + source_size))

    except EndOfStreamException:
        pass

    return source_set, drain_set, error_str
Beispiel #6
0
def do(section_list, fh):
    """Parses a codec information file. The described codec can only be
    a 'static character length' encoding. That is every character in the
    code occupies the same number of bytes.

    RETURNS: [0] Set of characters in unicode which are covered by the
                 described codec.
             [1] Range of values in the codec elements.
    """
    source_set = NumberSet()
    drain_set  = NumberSet()

    error_str = None

    try:
        while error_str is None:
            skip_whitespace(fh)
            source_begin = read_integer(fh)
            if source_begin is None:
                error_str = "Missing integer (source interval begin) in codec file."
                continue

            skip_whitespace(fh)
            source_size = read_integer(fh)
            if source_size is None:
                error_str = "Missing integer (source interval size) in codec file." 
                continue

            skip_whitespace(fh)
            target_begin = read_integer(fh)
            if target_begin is None:
                error_str = "Missing integer (target interval begin) in codec file."
                continue

            source_end = source_begin + source_size
            list.append(section_list, [source_begin, source_end, target_begin])

            source_set.add_interval(Interval(source_begin, source_end))
            drain_set.add_interval(Interval(target_begin, target_begin + source_size))

    except EndOfStreamException:
        pass

    return source_set, drain_set, error_str
Beispiel #7
0
def read_character_code(fh):
    # NOTE: This function is tested with the regeression test for feature request 2251359.
    #       See directory $QUEX_PATH/TEST/2251359.
    pos = fh.tell()
    
    start = fh.read(1)
    if start == "":  
        fh.seek(pos); return -1

    elif start == "'": 
        # read an utf-8 char an get the token-id
        # Example: '+'
        if check(fh, "\\"):
            # snap_backslashed_character throws an exception if 'backslashed char' is nonsense.
            character_code = snap_backslashed_character.do(fh, ReducedSetOfBackslashedCharactersF=True)
        else:
            character_code = __read_one_utf8_code_from_stream(fh)

        if character_code is None:
            error.log("Missing utf8-character for definition of character code by character.", 
                      fh)

        elif fh.read(1) != '\'':
            error.log("Missing closing ' for definition of character code by character.", 
                      fh)

        return character_code

    if start == "U":
        if fh.read(1) != "C": fh.seek(pos); return -1
        # read Unicode Name 
        # Example: UC MATHEMATICAL_MONOSPACE_DIGIT_FIVE
        skip_whitespace(fh)
        ucs_name = __read_token_identifier(fh)
        if ucs_name == "": fh.seek(pos); return -1
        # Get the character set related to the given name. Note, the size of the set
        # is supposed to be one.
        character_code = ucs_property_db.get_character_set("Name", ucs_name)
        if type(character_code) in [str, unicode]:
            error.verify_word_in_list(ucs_name, ucs_property_db["Name"].code_point_db,
                                      "The string %s\ndoes not identify a known unicode character." % ucs_name, 
                                      fh)
        elif type(character_code) not in [int, long]:
            error.log("%s relates to more than one character in unicode database." % ucs_name, 
                      fh) 
        return character_code

    fh.seek(pos)
    character_code = read_integer(fh)
    if character_code is not None: return character_code

    # Try to interpret it as something else ...
    fh.seek(pos)
    return -1               
Beispiel #8
0
def _read_value_specifier(fh, Keyword, Default=None):
    skip_whitespace(fh)
    value = read_integer(fh)
    if value is not None:     return value

    # not a number received, is it an identifier?
    variable = read_identifier(fh)
    if   variable != "":      return variable
    elif Default is not None: return Default

    error.log("Missing integer or variable name after keyword '%s'." % Keyword, fh) 
Beispiel #9
0
def __get_integer(MemberName):
    ValueStr = setup.__dict__[MemberName]
    if type(ValueStr) == int: return ValueStr
    result = read_integer(StringIO(ValueStr))
    if result is None:
        option_name = repr(SETUP_INFO[MemberName][0])[1:-1]
        error_msg("Cannot convert '%s' into an integer for '%s'.\n" % (ValueStr, option_name) + \
                  "Use prefix '0x' for hexadecimal numbers.\n" + \
                  "           '0o' for octal numbers.\n"       + \
                  "           '0b' for binary numbers.\n"      + \
                  "           '0r' for roman numbers.\n"      + \
                  "           and no prefix for decimal numbers.")
    return result
Beispiel #10
0
def __get_integer(MemberName):
    ValueStr = setup.__dict__[MemberName]
    if type(ValueStr) == int: return ValueStr
    result = read_integer(StringIO(ValueStr))
    if result is None:
        option_name = repr(SETUP_INFO[MemberName][0])[1:-1]
        error_msg("Cannot convert '%s' into an integer for '%s'.\n" % (ValueStr, option_name) + \
                  "Use prefix '0x' for hexadecimal numbers.\n" + \
                  "           '0o' for octal numbers.\n"       + \
                  "           '0b' for binary numbers.\n"      + \
                  "           '0r' for roman numbers.\n"      + \
                  "           and no prefix for decimal numbers.")
    return result
Beispiel #11
0
def test(Input, Cmp=None, Base=10):
    print "%s%s --> " % (Input, " " * (10 - len(Input))),

    try:
        output = read_integer(StringIO(Input))
    except:
        print
        return

    if Base == 10:
        print output,
    else:
        print "%X" % output,
    if Cmp is None: print
    else:           print Cmp == output
Beispiel #12
0
def test(Input, Cmp=None, Base=10):
    print "%s%s --> " % (Input, " " * (10 - len(Input))),

    try:
        output = read_integer(StringIO(Input))
    except:
        print
        return

    if Base == 10:
        print output,
    else:
        print "%X" % output,
    if Cmp is None: print
    else: print Cmp == output
Beispiel #13
0
def parse_token_id_definitions(fh, NamesOnlyF=False):
    """NamesOnlyF == True: Allow only definition of names, no numeric values 
                           may be assigned to it.

       'NamesOnlyF' indicates that data is not written to the global 
       'token_id_db'. Then only a list of names is returned.
    """
    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    prefix = Setup.token_id_prefix
    prefix_plain = Setup.token_id_prefix_plain  # i.e. without name space included

    if NamesOnlyF:
        result = set()

    skip_whitespace(fh)
    if not check(fh, "{"):
        error.log("Missing opening '{' for after 'token' section identifier.",
                  fh)

    while check(fh, "}") == False:
        skip_whitespace(fh)

        candidate = read_identifier(
            fh, TolerantF=True, OnMissingStr="Missing valid token identifier.")

        # -- check the name, if it starts with the token prefix paste a warning
        suspicious_prefix = None
        if len(prefix) != 0 and candidate.find(prefix) == 0:
            suspicious_prefix = prefix
        elif len(prefix_plain) != 0 and candidate.find(prefix_plain) == 0:
            suspicious_prefix = prefix_plain

        if suspicious_prefix is not None:
            error.warning("Token identifier '%s' starts with token prefix '%s'.\n" \
                      % (candidate, suspicious_prefix) \
                      + "Token prefix is mounted automatically. This token id appears in the source\n" \
                      + "code as '%s%s'." \
                      % (prefix, candidate), \
                      fh,
                      SuppressCode=NotificationDB.warning_token_id_prefix_appears_in_token_id_name)

        skip_whitespace(fh)

        if NamesOnlyF:
            result.add(prefix + candidate)
            if check(fh, ";") == False:
                error.log("Missing ';' after token identifier '%s'.\n" \
                          % candidate, fh)
            continue

        # Parse a possible numeric value after '='
        numeric_value = None
        if check(fh, "="):
            skip_whitespace(fh)
            numeric_value = read_integer(fh)
            if numeric_value is None:
                error.log(
                    "Missing number after '=' for token identifier '%s'." %
                    candidate, fh)

        if check(fh, ";") == False:
            error.log("Missing ';' after token identifier '%s'." % candidate,
                      fh)

        if not NamesOnlyF:
            ti = TokenInfo(candidate,
                           numeric_value,
                           SourceReference=SourceRef.from_FileHandle(fh))
            blackboard.token_id_db[candidate] = ti

    if NamesOnlyF:
        return sorted(list(result))
    else:
        return  # Changes are applied to 'blackboard.token_id_db'
Beispiel #14
0
def parse_token_id_definitions(fh, NamesOnlyF=False):
    """NamesOnlyF == True: Allow only definition of names, no numeric values 
                           may be assigned to it.

       'NamesOnlyF' indicates that data is not written to the global 
       'token_id_db'. Then only a list of names is returned.
    """
    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    prefix       = Setup.token_id_prefix
    prefix_plain = Setup.token_id_prefix_plain # i.e. without name space included

    if NamesOnlyF: 
        result = set()

    skip_whitespace(fh)
    if not check(fh, "{"):
        error.log("Missing opening '{' for after 'token' section identifier.", 
                  fh)

    while check(fh, "}") == False:
        skip_whitespace(fh)

        candidate = read_identifier(fh, TolerantF=True, OnMissingStr="Missing valid token identifier.")

        # -- check the name, if it starts with the token prefix paste a warning
        suspicious_prefix = None
        if len(prefix) != 0 and candidate.find(prefix) == 0:       
            suspicious_prefix = prefix
        elif len(prefix_plain) != 0 and candidate.find(prefix_plain) == 0: 
            suspicious_prefix = prefix_plain

        if suspicious_prefix is not None:
            error.warning("Token identifier '%s' starts with token prefix '%s'.\n" \
                      % (candidate, suspicious_prefix) \
                      + "Token prefix is mounted automatically. This token id appears in the source\n" \
                      + "code as '%s%s'." \
                      % (prefix, candidate), \
                      fh, 
                      SuppressCode=NotificationDB.warning_token_id_prefix_appears_in_token_id_name)

        skip_whitespace(fh)

        if NamesOnlyF:
            result.add(prefix + candidate)
            if check(fh, ";") == False:
                error.log("Missing ';' after token identifier '%s'.\n" \
                          % candidate, fh)
            continue

        # Parse a possible numeric value after '='
        numeric_value = None
        if check(fh, "="):
            skip_whitespace(fh)
            numeric_value = read_integer(fh)
            if numeric_value is None:
                error.log("Missing number after '=' for token identifier '%s'." % candidate, 
                          fh)

        if check(fh, ";") == False:
            error.log("Missing ';' after token identifier '%s'." % candidate, 
                      fh)

        if not NamesOnlyF:
            ti = TokenInfo(candidate, numeric_value, 
                           SourceReference=SourceRef.from_FileHandle(fh))
            blackboard.token_id_db[candidate] = ti

    if NamesOnlyF:
        return sorted(list(result))
    else:
        return # Changes are applied to 'blackboard.token_id_db'
Beispiel #15
0
def do(fh):
    """Parses pattern definitions of the form:
   
          [ \t]                                       => grid 4;
          [:intersection([:alpha:], [\X064-\X066]):]  => space 1;

       In other words the right hand side *must* be a character set.
          
    """
    indentation_setup = IndentationSetup(fh)

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    skip_whitespace(fh)

    while 1 + 1 == 2:
        skip_whitespace(fh)

        if check(fh, ">"):
            indentation_setup.seal()
            indentation_setup.consistency_check(fh)
            return indentation_setup

        # A regular expression state machine
        pattern_str, pattern = regular_expression.parse(fh)

        skip_whitespace(fh)
        if not check(fh, "=>"):
            error_msg("Missing '=>' after character set definition.", fh)

        skip_whitespace(fh)
        identifier = read_identifier(fh)
        if identifier == "":
            error_msg("Missing identifier for indentation element definition.",
                      fh)

        verify_word_in_list(
            identifier, ["space", "grid", "bad", "newline", "suppressor"],
            "Unrecognized indentation specifier '%s'." % identifier, fh)

        trigger_set = None
        if identifier in ["space", "bad", "grid"]:
            if len(pattern.sm.states) != 2:
                error_msg("For indentation '%s' only patterns are addmissible which\n" % identifier + \
                          "can be matched by a single character, e.g. \" \" or [a-z].", fh)
            transition_map = pattern.sm.get_init_state().transitions().get_map(
            )
            assert len(transition_map) == 1
            trigger_set = transition_map.values()[0]

        skip_whitespace(fh)
        if identifier == "space":
            value = read_integer(fh)
            if value is not None:
                indentation_setup.specify_space(pattern_str, trigger_set,
                                                value, fh)
            else:
                # not a number received, is it an identifier?
                variable = read_identifier(fh)
                if variable != "":
                    indentation_setup.specify_space(pattern_str, trigger_set,
                                                    variable, fh)
                else:
                    indentation_setup.specify_space(pattern_str, trigger_set,
                                                    1, fh)

        elif identifier == "grid":
            value = read_integer(fh)
            if value is not None:
                indentation_setup.specify_grid(pattern_str, trigger_set, value,
                                               fh)
            else:
                # not a number received, is it an identifier?
                skip_whitespace(fh)
                variable = read_identifier(fh)
                if variable != "":
                    indentation_setup.specify_grid(pattern_str, trigger_set,
                                                   variable, fh)
                else:
                    error_msg(
                        "Missing integer or variable name after keyword 'grid'.",
                        fh)

        elif identifier == "bad":
            indentation_setup.specify_bad(pattern_str, trigger_set, fh)

        elif identifier == "newline":
            indentation_setup.specify_newline(pattern_str, pattern.sm, fh)

        elif identifier == "suppressor":
            indentation_setup.specify_suppressor(pattern_str, pattern.sm, fh)

        else:
            assert False, "Unreachable code reached."

        if not check(fh, ";"):
            error_msg(
                "Missing ';' after indentation '%s' specification." %
                identifier, fh)
Beispiel #16
0
def do(fh):
    """Parses pattern definitions of the form:
   
          [ \t]                                       => grid 4;
          [:intersection([:alpha:], [\X064-\X066]):]  => space 1;

       In other words the right hand side *must* be a character set.
          
    """
    indentation_setup = IndentationSetup(fh)

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    skip_whitespace(fh)

    while 1 + 1 == 2:
        skip_whitespace(fh)

        if check(fh, ">"): 
            indentation_setup.seal()
            indentation_setup.consistency_check(fh)
            return indentation_setup
        
        # A regular expression state machine
        pattern_str, pattern = regular_expression.parse(fh)

        skip_whitespace(fh)
        if not check(fh, "=>"):
            error_msg("Missing '=>' after character set definition.", fh)

        skip_whitespace(fh)
        identifier = read_identifier(fh)
        if identifier == "":
            error_msg("Missing identifier for indentation element definition.", fh)

        verify_word_in_list(identifier, 
                            ["space", "grid", "bad", "newline", "suppressor"],
                            "Unrecognized indentation specifier '%s'." % identifier, fh)

        trigger_set = None
        if identifier in ["space", "bad", "grid"]:
            if len(pattern.sm.states) != 2:
                error_msg("For indentation '%s' only patterns are addmissible which\n" % identifier + \
                          "can be matched by a single character, e.g. \" \" or [a-z].", fh)
            transition_map = pattern.sm.get_init_state().transitions().get_map()
            assert len(transition_map) == 1
            trigger_set = transition_map.values()[0]

        skip_whitespace(fh)
        if identifier == "space":
            value = read_integer(fh)
            if value is not None: 
                indentation_setup.specify_space(pattern_str, trigger_set, value, fh)
            else:
                # not a number received, is it an identifier?
                variable = read_identifier(fh)
                if variable != "":
                    indentation_setup.specify_space(pattern_str, trigger_set, variable, fh)
                else:
                    indentation_setup.specify_space(pattern_str, trigger_set, 1, fh)

        elif identifier == "grid":
            value = read_integer(fh)
            if value is not None: 
                indentation_setup.specify_grid(pattern_str, trigger_set, value, fh)
            else:
                # not a number received, is it an identifier?
                skip_whitespace(fh)
                variable = read_identifier(fh)
                if variable != "":
                    indentation_setup.specify_grid(pattern_str, trigger_set, variable, fh)
                else:
                    error_msg("Missing integer or variable name after keyword 'grid'.", fh) 

        elif identifier == "bad":
            indentation_setup.specify_bad(pattern_str, trigger_set, fh)

        elif identifier == "newline":
            indentation_setup.specify_newline(pattern_str, pattern.sm, fh)

        elif identifier == "suppressor":
            indentation_setup.specify_suppressor(pattern_str, pattern.sm, fh)

        else:
            assert False, "Unreachable code reached."

        if not check(fh, ";"):
            error_msg("Missing ';' after indentation '%s' specification." % identifier, fh)
Beispiel #17
0
def read_character_code(fh):
    # NOTE: This function is tested with the regeression test for feature request 2251359.
    #       See directory $QUEX_PATH/TEST/2251359.
    pos = fh.tell()

    start = fh.read(1)
    if start == "":
        fh.seek(pos)
        return -1

    elif start == "'":
        # read an utf-8 char an get the token-id
        # Example: '+'
        if check(fh, "\\"):
            # snap_backslashed_character throws an exception if 'backslashed char' is nonsense.
            character_code = snap_backslashed_character.do(
                fh, ReducedSetOfBackslashedCharactersF=True)
        else:
            character_code = __read_one_utf8_code_from_stream(fh)

        if character_code is None:
            error_msg(
                "Missing utf8-character for definition of character code by character.",
                fh)

        elif fh.read(1) != '\'':
            error_msg(
                "Missing closing ' for definition of character code by character.",
                fh)

        return character_code

    if start == "U":
        if fh.read(1) != "C":
            fh.seek(pos)
            return -1
        # read Unicode Name
        # Example: UC MATHEMATICAL_MONOSPACE_DIGIT_FIVE
        skip_whitespace(fh)
        ucs_name = __read_token_identifier(fh)
        if ucs_name == "":
            fh.seek(pos)
            return -1
        # Get the character set related to the given name. Note, the size of the set
        # is supposed to be one.
        character_code = ucs_property_db.get_character_set("Name", ucs_name)
        if type(character_code) in [str, unicode]:
            verify_word_in_list(
                ucs_name, ucs_property_db["Name"].code_point_db,
                "The string %s\ndoes not identify a known unicode character." %
                ucs_name, fh)
        elif type(character_code) not in [int, long]:
            error_msg(
                "%s relates to more than one character in unicode database." %
                ucs_name, fh)
        return character_code

    fh.seek(pos)
    character_code = read_integer(fh)
    if character_code is not None: return character_code

    # Try to interpret it as something else ...
    fh.seek(pos)
    return -1
Beispiel #18
0
def parse_token_id_definitions(fh, NamesOnlyF=False):
    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    token_prefix = Setup.token_id_prefix
    token_prefix_plain = Setup.token_id_prefix_plain  # i.e. without name space included

    if NamesOnlyF: db = {}
    else: db = blackboard.token_id_db

    skip_whitespace(fh)
    if not check(fh, "{"):
        error_msg(
            "missing opening '{' for after 'token' section identifier.\n", fh)

    while check(fh, "}") == False:
        skip_whitespace(fh)

        candidate = read_identifier(fh, TolerantF=True)

        if candidate == "":
            error_msg("Missing valid token identifier." % candidate, fh)

        # -- check the name, if it starts with the token prefix paste a warning
        if candidate.find(token_prefix) == 0:
            error_msg("Token identifier '%s' starts with token prefix '%s'.\n" % (candidate, token_prefix) + \
                      "Token prefix is mounted automatically. This token id appears in the source\n" + \
                      "code as '%s%s'." % (token_prefix, candidate), \
                      fh, DontExitF=True)
        elif candidate.find(token_prefix_plain) == 0:
            error_msg("Token identifier '%s' starts with token prefix '%s'.\n" % (candidate, token_prefix) + \
                      "Token prefix is mounted automatically. This token id appears in the source\n" + \
                      "code as '%s%s'." % (token_prefix, candidate), \
                      fh, DontExitF=True)

        skip_whitespace(fh)

        if NamesOnlyF:
            db[token_prefix + candidate] = True
            if check(fh, ";") == False:
                error_msg("Missing ';' after definition of token identifier '%s'.\n" % candidate + \
                          "This is mandatory since Quex version 0.50.1.", fh)
            continue

        # Parse a possible numeric value after '='
        numeric_value = None
        if check(fh, "="):
            skip_whitespace(fh)
            numeric_value = read_integer(fh)
            if numeric_value is None:
                error_msg(
                    "Missing number after '=' for token identifier '%s'." %
                    candidate, fh)

        if check(fh, ";") == False:
            error_msg("Missing ';' after definition of token identifier '%s'.\n" % candidate + \
                      "This is mandatory since Quex version 0.50.1.", fh)

        db[candidate] = TokenInfo(candidate,
                                  numeric_value,
                                  Filename=fh.name,
                                  LineN=get_current_line_info_number(fh))

    if NamesOnlyF:
        result = db.keys()
        result.sort()
        return result