Esempio n. 1
0
def __parse_base_mode_list(fh, new_mode):
    new_mode.base_modes = []
    trailing_comma_f    = False
    while 1 + 1 == 2:
        if   check(fh, "{"): fh.seek(-1, 1); break
        elif check(fh, "<"): fh.seek(-1, 1); break

        skip_whitespace(fh)
        identifier = read_identifier(fh)
        if identifier == "": break

        new_mode.base_modes.append(identifier)
        trailing_comma_f = False
        if not check(fh, ","): break
        trailing_comma_f = True


    if trailing_comma_f:
        error_msg("Trailing ',' after base mode '%s'." % new_mode.base_modes[-1], fh, 
                  DontExitF=True, WarningF=True)
        
    elif len(new_mode.base_modes) != 0:
        # This check is a 'service' -- for those who follow the old convention
        pos = fh.tell()
        skip_whitespace(fh)
        dummy_identifier = read_identifier(fh)
        if dummy_identifier != "":
            error_msg("Missing separating ',' between base modes '%s' and '%s'.\n" \
                      % (new_mode.base_modes[-1], dummy_identifier) + \
                      "(The comma separator is mandatory since quex 0.53.1)", fh)
        fh.seek(pos)
Esempio n. 2
0
def __parse_base_mode_list(fh, new_mode):
    new_mode.derived_from_list = []
    trailing_comma_f = False
    while 1 + 1 == 2:
        if check(fh, "{"):
            fh.seek(-1, 1)
            break
        elif check(fh, "<"):
            fh.seek(-1, 1)
            break

        skip_whitespace(fh)
        identifier = read_identifier(fh)
        if identifier == "": break

        new_mode.derived_from_list.append(identifier)
        trailing_comma_f = False
        if not check(fh, ","): break
        trailing_comma_f = True

    if trailing_comma_f:
        error.warning(
            "Trailing ',' after base mode '%s'." %
            new_mode.derived_from_list[-1], fh)

    elif len(new_mode.derived_from_list) != 0:
        # This check is a 'service' -- for those who follow the old convention
        pos = fh.tell()
        skip_whitespace(fh)
        dummy_identifier = read_identifier(fh)
        if dummy_identifier != "":
            error.log("Missing separating ',' between base modes '%s' and '%s'.\n" \
                      % (new_mode.derived_from_list[-1], dummy_identifier) + \
                      "(The comma separator is mandatory since quex 0.53.1)", fh)
        fh.seek(pos)
Esempio n. 3
0
def __parse_keyword_list(new_mode, fh):
    """ADAPTS: new_mode.pattern_action_list where new pattern action pairs 
                                            are entered.
    RETURNS: True, in case of success.
    EXITS:   in case of syntax errors.
    """
    flags = optional_flags(
        fh, "keyword_list", "", {
            "u": "make correspondent token identifiers uppercase.",
            "l": "make correspondent token identifiers lowercase.",
            "N": "pass LexemeNull to token contructor.",
            "L": "pass Lexeme to token constructor.",
            "i": "implicit token identifier definition."
        }, ["ul", "NL"])

    lexeme_null_f = "N" in flags
    lexeme_f = "L" in flags
    implicit_tid_f = "i" in flags
    lowercase_f = "l" in flags
    uppercase_f = "u" in flags

    skip_whitespace(fh)
    prefix = read_identifier(fh)
    skip_whitespace(fh)

    check_or_die(fh, "{", "Opening bracket required after 'keyword_list'.")
    while not check(fh, "}"):
        skip_whitespace(fh)
        position = fh.tell()
        identifier = read_identifier(fh)
        pattern = regular_expression.parse(StringIO("%s " % identifier))

        check_or_die(fh, ";",
                     "Semincolon required after keyword '%s'." % identifier)
        if not identifier: continue
        if uppercase_f: identifier = identifier.upper()
        elif lowercase_f: identifier = identifier.lower()

        identifier = "%s%s" % (prefix, identifier)

        if implicit_tid_f: token_id_db_enter(fh, identifier)

        code = code_fragment.get_CodeUser_for_token_sending(
            fh,
            identifier,
            position,
            LexemeNullF=lexeme_null_f,
            LexemeF=lexeme_f)
        new_mode.add_pattern_action_pair(pattern, code, fh)
    return True
Esempio n. 4
0
def __parse_brief(new_mode, fh):
    """ADAPTS: new_mode.pattern_action_list where new pattern action pairs 
                                            are entered.
    RETURNS: True, in case of success.
    EXITS:   in case of syntax errors.
    """
    flags = optional_flags(
        fh, "brief pattern action pair list", "", {
            "N": "pass LexemeNull to token contructor.",
            "L": "pass Lexeme to token constructor.",
            "i": "implicit token identifier definition."
        }, ["NL"])

    skip_whitespace(fh)
    prefix = read_identifier(fh)
    skip_whitespace(fh)

    lexeme_null_f = "N" in flags
    lexeme_f = "L" in flags
    implicit_tid_f = "i" in flags

    check_or_die(fh, "{", "Opening bracket required after 'brief'.")
    while not check(fh, "}"):
        skip_whitespace(fh)

        pattern = regular_expression.parse(fh)
        skip_whitespace(fh)

        position = fh.tell()
        identifier = read_identifier(fh)
        if not identifier:
            error.log("Missing identifier after regular expression.", fh)

        identifier = "%s%s" % (prefix, identifier)

        check_or_die(
            fh, ";", "Semincolon required after brief token identifier '%s'." %
            identifier)

        if implicit_tid_f: token_id_db_enter(fh, identifier)

        code = code_fragment.get_CodeUser_for_token_sending(
            fh,
            identifier,
            position,
            LexemeNullF=lexeme_null_f,
            LexemeF=lexeme_f)
        new_mode.add_pattern_action_pair(pattern, code, fh)

    return True
Esempio n. 5
0
def __parse_section(fh, descriptor, already_defined_list):
    global token_type_code_fragment_db
    assert type(already_defined_list) == list

    SubsectionList = ["name", "file_name", "standard", "distinct", "union", "inheritable", "noid"] \
                      + token_type_code_fragment_db.keys()

    position = fh.tell()
    skip_whitespace(fh)
    word = read_identifier(fh)
    if word == "":
        fh.seek(position)
        if check(fh, "}"): 
            fh.seek(position) 
            return False
        error_msg("Missing token_type section ('standard', 'distinct', or 'union').", fh)

    verify_word_in_list(word, SubsectionList, 
                        "Subsection '%s' not allowed in token_type section." % word, fh)

    if word == "name":
        if not check(fh, "="):
            error_msg("Missing '=' in token_type 'name' specification.", fh)
        descriptor.class_name, descriptor.name_space, descriptor.class_name_safe = read_namespaced_name(fh, "token_type")
        if not check(fh, ";"):
            error_msg("Missing terminating ';' in token_type 'name' specification.", fh)

    elif word == "inheritable":
        descriptor.open_for_derivation_f = True
        check_or_die(fh, ";")

    elif word == "noid":
        descriptor.token_contains_token_id_f = False;
        check_or_die(fh, ";")

    elif word == "file_name":
        if not check(fh, "="):
            error_msg("Missing '=' in token_type 'file_name' specification.", fh)
        descriptor.set_file_name(read_until_letter(fh, ";"))
        if not check(fh, ";"):
            error_msg("Missing terminating ';' in token_type 'file_name' specification.", fh)

    elif word in ["standard", "distinct", "union"]:
        if   word == "standard": parse_standard_members(fh, word, descriptor, already_defined_list)
        elif word == "distinct": parse_distinct_members(fh, word, descriptor, already_defined_list)
        elif word == "union":    parse_union_members(fh, word, descriptor, already_defined_list)

        if not check(fh, "}"):
            fh.seek(position)
            error_msg("Missing closing '}' at end of token_type section '%s'." % word, fh);

    elif word in token_type_code_fragment_db.keys():
        fragment     = code_fragment.parse(fh, word, AllowBriefTokenSenderF=False)        
        descriptor.__dict__[word] = fragment

    else: 
        assert False, "This code section section should not be reachable because 'word'\n" + \
                      "was checked to fit in one of the 'elif' cases."

    return True
Esempio n. 6
0
def parse_token_id_definitions(fh, NamesOnlyF=False):
    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    token_prefix       = Setup.token_id_prefix
    token_prefix_plain = Setup.token_id_prefix_plain # i.e. without name space included

    if NamesOnlyF: db = {}
    else:          db = blackboard.token_id_db

    skip_whitespace(fh)
    if not check(fh, "{"):
        error_msg("missing opening '{' for after 'token' section identifier.\n", fh)

    while check(fh, "}") == False:
        skip_whitespace(fh)

        candidate = read_identifier(fh, TolerantF=True)

        if candidate == "":
            error_msg("Missing valid token identifier." % candidate, fh)

        # -- check the name, if it starts with the token prefix paste a warning
        if candidate.find(token_prefix) == 0:
            error_msg("Token identifier '%s' starts with token prefix '%s'.\n" % (candidate, token_prefix) + \
                      "Token prefix is mounted automatically. This token id appears in the source\n" + \
                      "code as '%s%s'." % (token_prefix, candidate), \
                      fh, DontExitF=True)
        elif candidate.find(token_prefix_plain) == 0:
            error_msg("Token identifier '%s' starts with token prefix '%s'.\n" % (candidate, token_prefix) + \
                      "Token prefix is mounted automatically. This token id appears in the source\n" + \
                      "code as '%s%s'." % (token_prefix, candidate), \
                      fh, DontExitF=True)

        skip_whitespace(fh)

        if NamesOnlyF:
            db[token_prefix + candidate] = True
            if check(fh, ";") == False:
                error_msg("Missing ';' after definition of token identifier '%s'.\n" % candidate + \
                          "This is mandatory since Quex version 0.50.1.", fh)
            continue

        # Parse a possible numeric value after '='
        numeric_value = None
        if check(fh, "="):
            skip_whitespace(fh)
            numeric_value = read_integer(fh)
            if numeric_value is None:
                error_msg("Missing number after '=' for token identifier '%s'." % candidate, fh)

        if check(fh, ";") == False:
            error_msg("Missing ';' after definition of token identifier '%s'.\n" % candidate + \
                      "This is mandatory since Quex version 0.50.1.", fh)

        db[candidate] = TokenInfo(candidate, numeric_value, Filename=fh.name, LineN=get_current_line_info_number(fh))

    if NamesOnlyF:
        result = db.keys()
        result.sort()
        return result
Esempio n. 7
0
def parse(fh):
    """This function parses a mode description and enters it into the 
       'blackboard.mode_description_db'. Once all modes are parsed
       they can be translated into 'real' modes and are located in
       'blackboard.mode_db'. 
    """

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    skip_whitespace(fh)
    mode_name = read_identifier(
        fh, OnMissingStr="Missing identifier at beginning of mode definition.")

    # NOTE: constructor does register this mode in the mode_db
    new_mode = ModeDescription(mode_name, SourceRef.from_FileHandle(fh))

    # (*) inherited modes / option_db
    skip_whitespace(fh)
    dummy = fh.read(1)
    if dummy not in [":", "{"]:
        error.log("missing ':' or '{' after mode '%s'" % mode_name, fh)

    if dummy == ":":
        __parse_option_list(new_mode, fh)

    # (*) read in pattern-action pairs and events
    while __parse_element(new_mode, fh):
        pass
Esempio n. 8
0
def parse(fh, mode_prep_prep_db):
    """This function parses a mode description and enters it into the 
    'blackboard.mode_prep_prep_db'. Modes are represented by Mode_PrepPrep
    objects.
    """

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    skip_whitespace(fh)
    mode_name = read_identifier(
        fh, OnMissingStr="Missing identifier at beginning of mode definition.")
    error.insight("Mode '%s'" % mode_name)

    # NOTE: constructor does register this mode in the mode_db
    new_mode = Mode_PrepPrep(mode_name, SourceRef.from_FileHandle(fh))
    if new_mode.name in mode_prep_prep_db:
        error.log("Mode '%s' has been defined twice.\n" % new_mode.name,
                  new_mode.sr,
                  DontExitF=True)
        error.log("Earlier definition here.",
                  mode_prep_prep_db[new_mode.name].sr)

    mode_prep_prep_db[new_mode.name] = new_mode

    # (*) inherited modes / option_db
    skip_whitespace(fh)
    dummy = fh.read(1)
    if dummy not in [":", "{"]:
        error.log("missing ':' or '{' after mode '%s'" % mode_name, fh)

    if dummy == ":":
        __parse_option_list(new_mode, fh)

    # (*) read in pattern-action pairs and events
    while __parse_element(new_mode, fh):
        pass
Esempio n. 9
0
def parse(fh):
    """This function parses a mode description and enters it into the 
       'mode_description_db'. Once all modes are parsed
       they can be translated into 'real' modes and are located in
       'blackboard.mode_db'. 
    """

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    skip_whitespace(fh)
    mode_name = read_identifier(fh)
    if mode_name == "":
        error_msg("missing identifier at beginning of mode definition.", fh)

    # NOTE: constructor does register this mode in the mode_db
    new_mode  = ModeDescription(mode_name, fh.name, get_current_line_info_number(fh))

    # (*) inherited modes / options
    skip_whitespace(fh)
    dummy = fh.read(1)
    if dummy not in [":", "{"]:
        error_msg("missing ':' or '{' after mode '%s'" % mode_name, fh)

    if dummy == ":":
        __parse_option_list(new_mode, fh)

    # (*) read in pattern-action pairs and events
    while __parse_element(new_mode, fh): 
        pass

    # (*) check for modes w/o pattern definitions
    if not new_mode.has_event_handler() and not new_mode.has_own_matches():
        if new_mode.options["inheritable"] != "only":
            new_mode.options["inheritable"] = "only"
            error_msg("Mode without pattern and event handlers needs to be 'inheritable only'.\n" + \
                      "<inheritable: only> has been added automatically.", fh,  DontExitF=True)
Esempio n. 10
0
def parse(fh):
    """This function parses a mode description and enters it into the 
       'blackboard.mode_description_db'. Once all modes are parsed
       they can be translated into 'real' modes and are located in
       'blackboard.mode_db'. 
    """

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    skip_whitespace(fh)
    mode_name = read_identifier(fh, OnMissingStr="Missing identifier at beginning of mode definition.")

    # NOTE: constructor does register this mode in the mode_db
    new_mode  = ModeDescription(mode_name, SourceRef.from_FileHandle(fh))

    # (*) inherited modes / option_db
    skip_whitespace(fh)
    dummy = fh.read(1)
    if dummy not in [":", "{"]:
        error_msg("missing ':' or '{' after mode '%s'" % mode_name, fh)

    if dummy == ":":
        __parse_option_list(new_mode, fh)

    # (*) read in pattern-action pairs and events
    while __parse_element(new_mode, fh): 
        pass
Esempio n. 11
0
def _parse_pattern(fh):
    name = read_identifier(fh, 
                           OnMissingStr="Missing identifier for pattern definition.")

    if blackboard.shorthand_db.has_key(name):
        error.log("Second definition of pattern '%s'.\n" % name + \
                  "Pattern names must be unique.", fh)

    skip_whitespace(fh)

    if check(fh, "}"): 
        error.log("Missing regular expression for pattern definition '%s'." % \
                  name, fh)

    # No encoding transformation, here. Transformation happens after 
    # expansion in a mode.
    pattern = regular_expression.parse(fh, AllowNothingIsFineF = True) 

    if pattern.has_pre_or_post_context():
        error.log("Pattern definition with pre- and/or post-context.\n" + \
                  "Pre- and Post-Contexts can only be defined inside mode definitions.", fh)
    state_machine = pattern.extract_sm()

    value = PatternShorthand(name, state_machine, SourceRef.from_FileHandle(fh), 
                             pattern.pattern_string())

    return name, value
Esempio n. 12
0
def snap_replacement(stream, PatternDict, StateMachineF=True):
    """Snaps a predefined pattern from the input string and returns the resulting
       state machine.
    """
    skip_whitespace(stream)
    pattern_name = read_identifier(stream)
    if pattern_name == "":
        raise RegularExpressionException(
            "Pattern replacement expression misses identifier after '{'.")
    skip_whitespace(stream)

    if not check(stream, "}"):
        raise RegularExpressionException("Pattern replacement expression misses closing '}' after '%s'." \
                                         % pattern_name)

    error.verify_word_in_list(
        pattern_name, PatternDict.keys(),
        "Specifier '%s' not found in any preceeding 'define { ... }' section."
        % pattern_name, stream)

    reference = PatternDict[pattern_name]
    assert reference.__class__ == PatternShorthand

    # The replacement may be a state machine or a number set
    if StateMachineF:
        # Get a cloned version of state machine
        state_machine = reference.get_state_machine()
        assert isinstance(state_machine, DFA)

        # It is essential that state machines defined as patterns do not
        # have origins. Otherwise, the optimization of patterns that
        # contain pattern replacements might get confused and can
        # not find all optimizations.
        assert not state_machine.has_specific_acceptance_id()

        # A state machine, that contains pre- or post- conditions cannot be part
        # of a replacement. The addition of new post-contexts would mess up the pattern.
        ## if state_machine.has_pre_or_post_context():
        ##    error.log("Pre- or post-conditioned pattern was used in replacement.\n" + \
        ##              "Quex's regular expression grammar does not allow this.", stream)

        return state_machine

    else:
        # Get a cloned version of character set
        character_set = reference.get_character_set()
        if character_set is None:
            error.log(
                "Replacement in character set expression must be a character set.\n"
                "Specifier '%s' relates to a pattern state machine." %
                pattern_name, stream)

        if character_set.is_empty():
            error.log(
                "Referenced character set '%s' is empty.\nAborted." %
                pattern_name, stream)

        return character_set
Esempio n. 13
0
def snap_set_term(stream, PatternDict):
    global special_character_set_db

    __debug_entry("set_term", stream)

    operation_list = ["union", "intersection", "difference", "inverse"]
    character_set_list = special_character_set_db.keys()

    skip_whitespace(stream)
    position = stream.tell()

    # if there is no following '(', then enter the 'snap_expression' block below
    word = read_identifier(stream)

    if word in operation_list:
        set_list = snap_set_list(stream, word, PatternDict)
        # if an error occurs during set_list parsing, an exception is thrown about syntax error

        L = len(set_list)
        result = set_list[0]

        if word == "inverse":
            # The inverse of multiple sets, is to be the inverse of the union of these sets.
            if L > 1:
                for character_set in set_list[1:]:
                    result.unite_with(character_set)
            return __debug_exit(
                result.get_complement(Setup.buffer_codec.source_set), stream)

        if L < 2:
            raise RegularExpressionException("Regular Expression: A %s operation needs at least\n" % word + \
                                             "two sets to operate on them.")

        if word == "union":
            for set in set_list[1:]:
                result.unite_with(set)
        elif word == "intersection":
            for set in set_list[1:]:
                result.intersect_with(set)
        elif word == "difference":
            for set in set_list[1:]:
                result.subtract(set)

    elif word in character_set_list:
        reg_expr = special_character_set_db[word]
        result = traditional_character_set.do_string(reg_expr)

    elif word != "":
        error.verify_word_in_list(word, character_set_list + operation_list,
                                  "Unknown keyword '%s'." % word, stream)
    else:
        stream.seek(position)
        result = snap_set_expression(stream, PatternDict)

    return __debug_exit(result, stream)
Esempio n. 14
0
def snap_set_term(stream, PatternDict):
    global special_character_set_db

    __debug_entry("set_term", stream)    

    operation_list     = [ "union", "intersection", "difference", "inverse"]
    character_set_list = special_character_set_db.keys()

    skip_whitespace(stream)
    position = stream.tell()

    # if there is no following '(', then enter the 'snap_expression' block below
    word = read_identifier(stream)

    if word in operation_list: 
        set_list = snap_set_list(stream, word, PatternDict)
        # if an error occurs during set_list parsing, an exception is thrown about syntax error

        L      = len(set_list)
        result = set_list[0]

        if word == "inverse":
            # The inverse of multiple sets, is to be the inverse of the union of these sets.
            if L > 1:
                for character_set in set_list[1:]:
                    result.unite_with(character_set)
            return __debug_exit(result.get_complement(Setup.buffer_codec.source_set), stream)

        if L < 2:
            raise RegularExpressionException("Regular Expression: A %s operation needs at least\n" % word + \
                                             "two sets to operate on them.")
            
        if   word == "union":
            for set in set_list[1:]:
                result.unite_with(set)
        elif word == "intersection":
            for set in set_list[1:]:
                result.intersect_with(set)
        elif word == "difference":
            for set in set_list[1:]:
                result.subtract(set)

    elif word in character_set_list:
        reg_expr = special_character_set_db[word]
        result   = traditional_character_set.do_string(reg_expr)

    elif word != "":
        verify_word_in_list(word, character_set_list + operation_list, 
                            "Unknown keyword '%s'." % word, stream)
    else:
        stream.seek(position)
        result = snap_set_expression(stream, PatternDict)

    return __debug_exit(result, stream)
Esempio n. 15
0
def parse_pattern_name_definitions(fh):
    """Parses pattern definitions of the form:
   
          WHITESPACE  [ \t\n]
          IDENTIFIER  [a-zA-Z0-9]+
          OP_PLUS     "+"
          
       That means: 'name' whitespace 'regular expression' whitespace newline.
       Comments can only be '//' nothing else and they have to appear at the
       beginning of the line.
       
       One regular expression can have more than one name, but one name can 
       only have one regular expression.
    """
    skip_whitespace(fh)
    if not check(fh, "{"):
        error.log("define region must start with opening '{'.", fh)

    while 1 + 1 == 2:
        skip_whitespace(fh)

        if check(fh, "}"):
            return

        # -- get the name of the pattern
        skip_whitespace(fh)
        pattern_name = read_identifier(
            fh, OnMissingStr="Missing identifier for pattern definition.")

        if blackboard.shorthand_db.has_key(pattern_name):
            error.log("Second definition of pattern '%s'.\n" % pattern_name + \
                      "Pattern names must be unique.", fh)

        skip_whitespace(fh)

        if check(fh, "}"):
            error.log("Missing regular expression for pattern definition '%s'." % \
                      pattern_name, fh)

        # A regular expression state machine
        # (No possible transformation into a particular codec whatever.
        #  the state machines are transformed once, after they are expanded
        #  as patterns in a mode.)
        pattern = regular_expression.parse(fh, AllowNothingIsFineF=True)

        if pattern.has_pre_or_post_context():
            error.log("Pattern definition with pre- and/or post-context.\n" + \
                      "Pre- and Post-Contexts can only be defined inside mode definitions.",
                      fh)
        state_machine = pattern.sm

        blackboard.shorthand_db[pattern_name] = \
                PatternShorthand(pattern_name, state_machine,
                                 SourceRef.from_FileHandle(fh), pattern.pattern_string())
Esempio n. 16
0
def _read_value_specifier(fh, Keyword, Default=None):
    skip_whitespace(fh)
    value = read_integer(fh)
    if value is not None:     return value

    # not a number received, is it an identifier?
    variable = read_identifier(fh)
    if   variable != "":      return variable
    elif Default is not None: return Default

    error.log("Missing integer or variable name after keyword '%s'." % Keyword, fh) 
Esempio n. 17
0
File: core.py Progetto: xxyzzzq/quex
def parse_pattern_name_definitions(fh):
    """Parses pattern definitions of the form:
   
          WHITESPACE  [ \t\n]
          IDENTIFIER  [a-zA-Z0-9]+
          OP_PLUS     "+"
          
       That means: 'name' whitespace 'regular expression' whitespace newline.
       Comments can only be '//' nothing else and they have to appear at the
       beginning of the line.
       
       One regular expression can have more than one name, but one name can 
       only have one regular expression.
    """
    skip_whitespace(fh)
    if not check(fh, "{"):
        error.log("define region must start with opening '{'.", fh)

    while 1 + 1 == 2:
        skip_whitespace(fh)

        if check(fh, "}"): 
            return
        
        # -- get the name of the pattern
        skip_whitespace(fh)
        pattern_name = read_identifier(fh, OnMissingStr="Missing identifier for pattern definition.")

        if blackboard.shorthand_db.has_key(pattern_name):
            error.log("Second definition of pattern '%s'.\n" % pattern_name + \
                      "Pattern names must be unique.", fh)

        skip_whitespace(fh)

        if check(fh, "}"): 
            error.log("Missing regular expression for pattern definition '%s'." % \
                      pattern_name, fh)

        # A regular expression state machine
        # (No possible transformation into a particular codec whatever.
        #  the state machines are transformed once, after they are expanded
        #  as patterns in a mode.)
        pattern = regular_expression.parse(fh, AllowNothingIsFineF = True) 

        if pattern.has_pre_or_post_context():
            error.log("Pattern definition with pre- and/or post-context.\n" + \
                      "Pre- and Post-Contexts can only be defined inside mode definitions.", 
                      fh)
        state_machine = pattern.sm

        blackboard.shorthand_db[pattern_name] = \
                PatternShorthand(pattern_name, state_machine, 
                                 SourceRef.from_FileHandle(fh), pattern.pattern_string())
Esempio n. 18
0
def __parse_element(new_mode, fh):
    """Returns: False, if a closing '}' has been found.
                True, else.
    """
    position = fh.tell()
    try:
        description = "pattern or event handler"

        skip_whitespace(fh)
        # NOTE: Do not use 'read_word' since we need to continue directly after
        #       whitespace, if a regular expression is to be parsed.
        position = fh.tell()

        identifier = read_identifier(fh)
        if identifier == "keyword_list":
            return __parse_keyword_list(new_mode, fh)
        elif similarity.get(identifier, ["keyword_list", "key words"]) != -1:
            error.warning(
                "'%s' is similar to keyword 'keyword_list'.\n"
                "For clarity, use quotes." % identifier, fh)
        elif identifier == "brief":
            return __parse_brief(new_mode, fh)
        elif similarity.get(identifier,
                            ["brief", "briefing", "briefly"]) != -1:
            error.warning(
                "'%s' is similar to keyword 'brief'.\n"
                "For clarity, use quotes." % identifier, fh)

        fh.seek(position)
        word = read_until_whitespace(fh)
        if word == "}":
            return False
            # -- check for 'on_entry', 'on_exit', ...
        elif __parse_event(new_mode, fh, word):
            return True

        fh.seek(position)
        description = "start of mode element: regular expression"
        pattern = regular_expression.parse(fh)
        pattern.set_source_reference(
            SourceRef.from_FileHandle(fh, new_mode.name))

        position = fh.tell()
        description = "start of mode element: code fragment for '%s'" % pattern.pattern_string(
        )

        __parse_action(new_mode, fh, pattern.pattern_string(), pattern)

    except EndOfStreamException:
        fh.seek(position)
        error.error_eof(description, fh)

    return True
Esempio n. 19
0
def parse_pattern_name_definitions(fh):
    """Parses pattern definitions of the form:
   
          WHITESPACE  [ \t\n]
          IDENTIFIER  [a-zA-Z0-9]+
          OP_PLUS     "+"
          
       That means: 'name' whitespace 'regular expression' whitespace newline.
       Comments can only be '//' nothing else and they have to appear at the
       beginning of the line.
       
       One regular expression can have more than one name, but one name can 
       only have one regular expression.
    """
    skip_whitespace(fh)
    if not check(fh, "{"):
        error_msg("define region must start with opening '{'.", fh)

    while 1 + 1 == 2:
        skip_whitespace(fh)

        if check(fh, "}"):
            return

        # -- get the name of the pattern
        skip_whitespace(fh)
        pattern_name = read_identifier(fh)
        if pattern_name == "":
            error_msg("Missing identifier for pattern definition.", fh)

        skip_whitespace(fh)

        if check(fh, "}"):
            error_msg("Missing regular expression for pattern definition '%s'." % \
                      pattern_name, fh)

        # A regular expression state machine
        # (No possible transformation into a particular codec whatever.
        #  the state machines are transformed once, after they are expanded
        #  as patterns in a mode.)
        regular_expression_str, pattern = \
                regular_expression.parse(fh, AllowNothingIsFineF = True,
                                         AllowStateMachineTrafoF = False)

        if pattern.has_pre_or_post_context():
            error_msg("Pattern definition with pre- and/or post-context.\n" + \
                      "Pre- and Post-Contexts can only be defined inside mode definitions.", fh)
        state_machine = pattern.sm

        blackboard.shorthand_db[pattern_name] = \
                blackboard.PatternShorthand(pattern_name, state_machine,
                                            fh.name, get_current_line_info_number(fh),
                                            regular_expression_str)
Esempio n. 20
0
def parse_pattern_name_definitions(fh):
    """Parses pattern definitions of the form:
   
          WHITESPACE  [ \t\n]
          IDENTIFIER  [a-zA-Z0-9]+
          OP_PLUS     "+"
          
       That means: 'name' whitespace 'regular expression' whitespace newline.
       Comments can only be '//' nothing else and they have to appear at the
       beginning of the line.
       
       One regular expression can have more than one name, but one name can 
       only have one regular expression.
    """
    skip_whitespace(fh)
    if not check(fh, "{"):
        error_msg("define region must start with opening '{'.", fh)

    while 1 + 1 == 2:
        skip_whitespace(fh)

        if check(fh, "}"): 
            return
        
        # -- get the name of the pattern
        skip_whitespace(fh)
        pattern_name = read_identifier(fh)
        if pattern_name == "":
            error_msg("Missing identifier for pattern definition.", fh)

        skip_whitespace(fh)

        if check(fh, "}"): 
            error_msg("Missing regular expression for pattern definition '%s'." % \
                      pattern_name, fh)

        # A regular expression state machine
        # (No possible transformation into a particular codec whatever.
        #  the state machines are transformed once, after they are expanded
        #  as patterns in a mode.)
        regular_expression_str, pattern = \
                regular_expression.parse(fh, AllowNothingIsFineF = True, 
                                         AllowStateMachineTrafoF = False) 

        if pattern.has_pre_or_post_context():
            error_msg("Pattern definition with pre- and/or post-context.\n" + \
                      "Pre- and Post-Contexts can only be defined inside mode definitions.", fh)
        state_machine = pattern.sm

        blackboard.shorthand_db[pattern_name] = \
                blackboard.PatternShorthand(pattern_name, state_machine, 
                                            fh.name, get_current_line_info_number(fh),
                                            regular_expression_str)
Esempio n. 21
0
def snap_replacement(stream, PatternDict, StateMachineF=True):
    """Snaps a predefined pattern from the input string and returns the resulting
       state machine.
    """ 
    skip_whitespace(stream)
    pattern_name = read_identifier(stream)  
    if pattern_name == "":
        raise RegularExpressionException("Pattern replacement expression misses identifier after '{'.")
    skip_whitespace(stream)

    if not check(stream, "}"):
        raise RegularExpressionException("Pattern replacement expression misses closing '}' after '%s'." \
                                         % pattern_name)

    verify_word_in_list(pattern_name, PatternDict.keys(),
                        "Specifier '%s' not found in any preceeding 'define { ... }' section." % pattern_name, 
                        stream)

    reference = PatternDict[pattern_name]
    assert reference.__class__.__name__ == "PatternShorthand" 

    # The replacement may be a state machine or a number set
    if StateMachineF:
        # Get a cloned version of state machine
        state_machine = reference.get_state_machine()
        assert isinstance(state_machine, StateMachine)

        # It is essential that state machines defined as patterns do not 
        # have origins. Otherwise, the optimization of patterns that
        # contain pattern replacements might get confused and can
        # not find all optimizations.
        assert state_machine.has_origins() == False
            
        # A state machine, that contains pre- or post- conditions cannot be part
        # of a replacement. The addition of new post-contexts would mess up the pattern.
        ## if state_machine.has_pre_or_post_context():
        ##    error_msg("Pre- or post-conditioned pattern was used in replacement.\n" + \
        ##              "Quex's regular expression grammar does not allow this.", stream)
            
        return state_machine

    else:
        # Get a cloned version of character set
        character_set = reference.get_character_set()
        if character_set is None:
            error_msg("Replacement in character set expression must be a character set.\n"
                      "Specifier '%s' relates to a pattern state machine." % pattern_name, stream)

        if character_set.is_empty():
            error_msg("Referenced character set '%s' is empty.\nAborted." % pattern_name, stream)

        return character_set
Esempio n. 22
0
def read_option_start(fh):
    skip_whitespace(fh)

    # (*) base modes 
    if fh.read(1) != "<": 
        ##fh.seek(-1, 1) 
        return None

    skip_whitespace(fh)
    identifier = read_identifier(fh, OnMissingStr="Missing identifer after start of mode option '<'").strip()

    skip_whitespace(fh)
    if fh.read(1) != ":": error.log("missing ':' after option name '%s'" % identifier, fh)
    skip_whitespace(fh)

    return identifier
Esempio n. 23
0
def _parse_definition_head(fh, IdentifierList):

    if check(fh, "\\default"): 
        error.log("'\\default' has been replaced by keyword '\\else' since quex 0.64.9!", fh)
    elif check(fh, "\\else"): 
        pattern = None
    else:                      
        pattern = regular_expression.parse(fh)

    skip_whitespace(fh)
    check_or_die(fh, "=>", " after character set definition.")

    skip_whitespace(fh)
    identifier = read_identifier(fh, OnMissingStr="Missing identifier following '=>'.")
    error.verify_word_in_list(identifier, IdentifierList,
                              "Unrecognized specifier '%s'." % identifier, fh)
    skip_whitespace(fh)

    return pattern, identifier, SourceRef.from_FileHandle(fh)
Esempio n. 24
0
def read_option_start(fh):
    skip_whitespace(fh)

    # (*) base modes
    if fh.read(1) != "<":
        ##fh.seek(-1, 1)
        return None

    skip_whitespace(fh)
    identifier = read_identifier(
        fh,
        OnMissingStr="Missing identifer after start of mode option '<'").strip(
        )

    skip_whitespace(fh)
    if fh.read(1) != ":":
        error.log("missing ':' after option name '%s'" % identifier, fh)
    skip_whitespace(fh)

    return identifier
Esempio n. 25
0
def parse(fh):
    """This function parses a mode description and enters it into the 
       'mode_description_db'. Once all modes are parsed
       they can be translated into 'real' modes and are located in
       'blackboard.mode_db'. 
    """

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    skip_whitespace(fh)
    mode_name = read_identifier(fh)
    if mode_name == "":
        error_msg("missing identifier at beginning of mode definition.", fh)

    # NOTE: constructor does register this mode in the mode_db
    new_mode = ModeDescription(mode_name, fh.name,
                               get_current_line_info_number(fh))

    # (*) inherited modes / options
    skip_whitespace(fh)
    dummy = fh.read(1)
    if dummy not in [":", "{"]:
        error_msg("missing ':' or '{' after mode '%s'" % mode_name, fh)

    if dummy == ":":
        __parse_option_list(new_mode, fh)

    # (*) read in pattern-action pairs and events
    while __parse_element(new_mode, fh):
        pass

    # (*) check for modes w/o pattern definitions
    if not new_mode.has_event_handler() and not new_mode.has_own_matches():
        if new_mode.options["inheritable"] != "only":
            new_mode.options["inheritable"] = "only"
            error_msg("Mode without pattern and event handlers needs to be 'inheritable only'.\n" + \
                      "<inheritable: only> has been added automatically.", fh,  DontExitF=True)
Esempio n. 26
0
def parse_section(fh):
    global default_token_type_definition_triggered_by_mode_definition_f

    # NOTE: End of File is supposed to be reached when trying to read a new
    #       section. Thus, the end-of-file catcher does not encompass the beginning.
    position = fh.tell()
    skip_whitespace(fh)
    word = read_identifier(fh, OnMissingStr="Missing section title")

    error.verify_word_in_list(word, blackboard.all_section_title_list,
                              "Unknown quex section '%s'" % word, fh)
    try:
        # (*) determine what is defined
        #
        #     -- 'mode { ... }'     => define a mode
        #     -- 'start = ...;'     => define the name of the initial mode
        #     -- 'header { ... }'   => define code that is to be pasted on top
        #                              of the engine (e.g. "#include<...>")
        #     -- 'body { ... }'     => define code that is to be pasted in the class' body
        #                              of the engine (e.g. "public: int  my_member;")
        #     -- 'init { ... }'     => define code that is to be pasted in the class' constructors
        #                              of the engine (e.g. "my_member = -1;")
        #     -- 'define { ... }'   => define patterns shorthands such as IDENTIFIER for [a-z]+
        #     -- 'repeated_token_id = QUEX_TKN_ ...;' => enables token repetition, defines
        #                                                the token id to be repeated.
        #     -- 'token { ... }'    => define token ids
        #     -- 'token_type { ... }'  => define a customized token type
        #
        if word in blackboard.fragment_db.keys():
            element_name = blackboard.fragment_db[word]
            fragment = code_fragment.parse(fh,
                                           word,
                                           AllowBriefTokenSenderF=False)
            blackboard.__dict__[element_name] = fragment
            return

        elif word == "start":
            mode_name = parse_identifier_assignment(fh)
            if mode_name == "":
                error.log("Missing mode_name after 'start ='", fh)

            elif not blackboard.initial_mode.sr.is_void():
                error.log("start mode defined more than once!",
                          fh,
                          DontExitF=True)
                error.log("previously defined here",
                          blackboard.initial_mode.sr)

            blackboard.initial_mode = CodeUser(mode_name,
                                               SourceRef.from_FileHandle(fh))
            return

        elif word == "repeated_token":
            blackboard.token_repetition_token_id_list = parse_token_id_definitions(
                fh, NamesOnlyF=True)
            for token_name in blackboard.token_repetition_token_id_list:
                error.verify_word_in_list(
                    token_name[len(Setup.token_id_prefix):],
                    blackboard.token_id_db.keys(),
                    "Token ID '%s' not yet defined." % token_name,
                    fh,
                    ExitF=False,
                    SuppressCode=NotificationDB.
                    warning_repeated_token_not_yet_defined)
            return

        elif word == "define":
            parse_pattern_name_definitions(fh)
            return

        elif word == "token":
            if Setup.token_id_foreign_definition:
                error.log("Token id file '%s' has been specified.\n" \
                          % Setup.token_id_foreign_definition_file \
                          + "All token ids must be specified there. Section 'token'\n" \
                          + "is not allowed.", fh)

            parse_token_id_definitions(fh)
            return

        elif word == "token_type":

            if Setup.token_class_file != "":
                error.log("Section 'token_type' is intended to generate a token class.\n" \
                          + "However, the manually written token class file '%s'" \
                          % repr(Setup.token_class_file) \
                          + "has been specified on the command line.",
                          fh)

            if blackboard.token_type_definition is None:
                blackboard.token_type_definition = token_type.parse(fh)
                return

            # Error case:
            if default_token_type_definition_triggered_by_mode_definition_f:
                error.log(
                    "Section 'token_type' must appear before first mode definition.",
                    fh)
            else:
                error.log("Section 'token_type' has been defined twice.",
                          fh,
                          DontExitF=True)
                error.log("Previously defined here.",
                          blackboard.token_type_definition.sr)
            return

        elif word == "mode":
            # When the first mode is parsed then a token_type definition must be
            # present. If not, the default token type definition is considered.
            if blackboard.token_type_definition is None:
                parse_default_token_definition()
                default_token_type_definition_triggered_by_mode_definition_f = True

            mode.parse(fh)
            return

        else:
            # This case should have been caught by the 'verify_word_in_list' function
            assert False

    except EndOfStreamException:
        fh.seek(position)
        error.error_eof(word, fh)
Esempio n. 27
0
def parse_token_id_definitions(fh, NamesOnlyF=False):
    """NamesOnlyF == True: Allow only definition of names, no numeric values 
                           may be assigned to it.

       'NamesOnlyF' indicates that data is not written to the global 
       'token_id_db'. Then only a list of names is returned.
    """
    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    prefix = Setup.token_id_prefix
    prefix_plain = Setup.token_id_prefix_plain  # i.e. without name space included

    if NamesOnlyF:
        result = set()

    skip_whitespace(fh)
    if not check(fh, "{"):
        error.log("Missing opening '{' for after 'token' section identifier.",
                  fh)

    while check(fh, "}") == False:
        skip_whitespace(fh)

        candidate = read_identifier(
            fh, TolerantF=True, OnMissingStr="Missing valid token identifier.")

        # -- check the name, if it starts with the token prefix paste a warning
        suspicious_prefix = None
        if len(prefix) != 0 and candidate.find(prefix) == 0:
            suspicious_prefix = prefix
        elif len(prefix_plain) != 0 and candidate.find(prefix_plain) == 0:
            suspicious_prefix = prefix_plain

        if suspicious_prefix is not None:
            error.warning("Token identifier '%s' starts with token prefix '%s'.\n" \
                      % (candidate, suspicious_prefix) \
                      + "Token prefix is mounted automatically. This token id appears in the source\n" \
                      + "code as '%s%s'." \
                      % (prefix, candidate), \
                      fh,
                      SuppressCode=NotificationDB.warning_token_id_prefix_appears_in_token_id_name)

        skip_whitespace(fh)

        if NamesOnlyF:
            result.add(prefix + candidate)
            if check(fh, ";") == False:
                error.log("Missing ';' after token identifier '%s'.\n" \
                          % candidate, fh)
            continue

        # Parse a possible numeric value after '='
        numeric_value = None
        if check(fh, "="):
            skip_whitespace(fh)
            numeric_value = read_integer(fh)
            if numeric_value is None:
                error.log(
                    "Missing number after '=' for token identifier '%s'." %
                    candidate, fh)

        if check(fh, ";") == False:
            error.log("Missing ';' after token identifier '%s'." % candidate,
                      fh)

        if not NamesOnlyF:
            ti = TokenInfo(candidate,
                           numeric_value,
                           SourceReference=SourceRef.from_FileHandle(fh))
            blackboard.token_id_db[candidate] = ti

    if NamesOnlyF:
        return sorted(list(result))
    else:
        return  # Changes are applied to 'blackboard.token_id_db'
Esempio n. 28
0
def do(fh):
    """Parses pattern definitions of the form:
   
          [ \t]                                       => grid 4;
          [:intersection([:alpha:], [\X064-\X066]):]  => space 1;

       In other words the right hand side *must* be a character set.
          
    """
    indentation_setup = IndentationSetup(fh)

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    skip_whitespace(fh)

    while 1 + 1 == 2:
        skip_whitespace(fh)

        if check(fh, ">"): 
            indentation_setup.seal()
            indentation_setup.consistency_check(fh)
            return indentation_setup
        
        # A regular expression state machine
        pattern_str, pattern = regular_expression.parse(fh)

        skip_whitespace(fh)
        if not check(fh, "=>"):
            error_msg("Missing '=>' after character set definition.", fh)

        skip_whitespace(fh)
        identifier = read_identifier(fh)
        if identifier == "":
            error_msg("Missing identifier for indentation element definition.", fh)

        verify_word_in_list(identifier, 
                            ["space", "grid", "bad", "newline", "suppressor"],
                            "Unrecognized indentation specifier '%s'." % identifier, fh)

        trigger_set = None
        if identifier in ["space", "bad", "grid"]:
            if len(pattern.sm.states) != 2:
                error_msg("For indentation '%s' only patterns are addmissible which\n" % identifier + \
                          "can be matched by a single character, e.g. \" \" or [a-z].", fh)
            transition_map = pattern.sm.get_init_state().transitions().get_map()
            assert len(transition_map) == 1
            trigger_set = transition_map.values()[0]

        skip_whitespace(fh)
        if identifier == "space":
            value = read_integer(fh)
            if value is not None: 
                indentation_setup.specify_space(pattern_str, trigger_set, value, fh)
            else:
                # not a number received, is it an identifier?
                variable = read_identifier(fh)
                if variable != "":
                    indentation_setup.specify_space(pattern_str, trigger_set, variable, fh)
                else:
                    indentation_setup.specify_space(pattern_str, trigger_set, 1, fh)

        elif identifier == "grid":
            value = read_integer(fh)
            if value is not None: 
                indentation_setup.specify_grid(pattern_str, trigger_set, value, fh)
            else:
                # not a number received, is it an identifier?
                skip_whitespace(fh)
                variable = read_identifier(fh)
                if variable != "":
                    indentation_setup.specify_grid(pattern_str, trigger_set, variable, fh)
                else:
                    error_msg("Missing integer or variable name after keyword 'grid'.", fh) 

        elif identifier == "bad":
            indentation_setup.specify_bad(pattern_str, trigger_set, fh)

        elif identifier == "newline":
            indentation_setup.specify_newline(pattern_str, pattern.sm, fh)

        elif identifier == "suppressor":
            indentation_setup.specify_suppressor(pattern_str, pattern.sm, fh)

        else:
            assert False, "Unreachable code reached."

        if not check(fh, ";"):
            error_msg("Missing ';' after indentation '%s' specification." % identifier, fh)
Esempio n. 29
0
def parse_section(fh):
    global default_token_type_definition_triggered_by_mode_definition_f

    # NOTE: End of File is supposed to be reached when trying to read a new
    #       section. Thus, the end-of-file catcher does not encompass the beginning.
    position = fh.tell()
    skip_whitespace(fh)
    word = read_identifier(fh)
    if word == "":
        error_msg("Missing section title.", fh)

    verify_word_in_list(word, blackboard.all_section_title_list, 
                        "Unknown quex section '%s'" % word, fh)
    try:
        # (*) determine what is defined
        #
        #     -- 'mode { ... }'     => define a mode
        #     -- 'start = ...;'     => define the name of the initial mode
        #     -- 'header { ... }'   => define code that is to be pasted on top
        #                              of the engine (e.g. "#include<...>")
        #     -- 'body { ... }'     => define code that is to be pasted in the class' body
        #                              of the engine (e.g. "public: int  my_member;")
        #     -- 'init { ... }'     => define code that is to be pasted in the class' constructors
        #                              of the engine (e.g. "my_member = -1;")
        #     -- 'define { ... }'   => define patterns shorthands such as IDENTIFIER for [a-z]+
        #     -- 'repeated_token_id = QUEX_TKN_ ...;' => enables token repetition, defines
        #                                                the token id to be repeated.
        #     -- 'token { ... }'    => define token ids
        #     -- 'token_type { ... }'  => define a customized token type
        #
        if word in blackboard.fragment_db.keys():
            element_name = blackboard.fragment_db[word]
            fragment     = code_fragment.parse(fh, word, AllowBriefTokenSenderF=False)        
            blackboard.__dict__[element_name] = fragment
            return

        elif word == "start":
            mode_name = parse_identifier_assignment(fh)
            if mode_name == "":
                error_msg("Missing mode_name after 'start ='", fh)
            elif blackboard.initial_mode.get_pure_code() != "":
                error_msg("start mode defined more than once!", fh, DontExitF=True)
                error_msg("previously defined here",
                          blackboard.initial_mode.filename,
                          blackboard.initial_mode.line_n)
        
            blackboard.initial_mode = UserCodeFragment(mode_name, fh.name, 
                                                       get_current_line_info_number(fh))
            return

        elif word == "repeated_token":
            blackboard.token_repetition_token_id_list = parse_token_id_definitions(fh, NamesOnlyF=True)
            for token_name in blackboard.token_repetition_token_id_list:
                verify_word_in_list(token_name[len(Setup.token_id_prefix):],
                                    blackboard.token_id_db.keys(),
                                    "Token ID '%s' not yet defined." % token_name,
                                    fh, ExitF=False)
            return
            
        elif word == "define":
            parse_pattern_name_definitions(fh)
            return

        elif word == "token":       
            parse_token_id_definitions(fh)
            return

        elif word == "token_type":       

            if Setup.token_class_file != "":
                error_msg("Token type definition inadmissible while specifying on the command line\n" + \
                          "the file %s to contain a manually written token class." % repr(Setup.token_class_file),
                          fh)
       
            if blackboard.token_type_definition is None:
                blackboard.token_type_definition = token_type.parse(fh)
                return

            # Error case:
            if default_token_type_definition_triggered_by_mode_definition_f:
                error_msg("Section 'token_type' must appear before first mode definition.", fh)
            else:
                error_msg("Section 'token_type' has been defined twice.", fh, DontExitF=True)
                error_msg("Previously defined here.",
                          blackboard.token_type_definition.file_name_of_token_type_definition,
                          blackboard.token_type_definition.line_n_of_token_type_definition)
            return

        elif word == "mode":
            # When the first mode is parsed then a token_type definition must be 
            # present. If not, the default token type definition is considered.
            if blackboard.token_type_definition is None:
                parse_default_token_definition()
                default_token_type_definition_triggered_by_mode_definition_f = True

            mode.parse(fh)
            return

        else:
            # This case should have been caught by the 'verify_word_in_list' function
            assert False

    except EndOfStreamException:
        fh.seek(position)
        error_msg("End of file reached while parsing '%s' section" % word, fh)
Esempio n. 30
0
def parse_variable_definition(fh, GroupF=False, already_defined_list=[]):
    """PURPOSE: Parsing of a variable definition consisting of 'type' and 'name.
                Members can be mentioned together in a group, which means that
                they can appear simultaneously. Possible expresions are

                (1) single variables:

                              name0 : type;
                              name1 : type[32];
                              name2 : type*;

                (2) combined variables

                              {
                                  sub_name0 : type0;
                                  sub_name1 : type[64];
                                  sub_name2 : type1*;
                              }

       ARGUMENTS: 

        'GroupF'               allows to have 'nested variable groups' in curly brackets

        'already_defined_list' informs about variable names that have been already
                               chosen. It is only used for groups.

       RETURNS:
                 None        on failure to pass a variable definition.
                 array       when a single variable definition was found. 
                                array[0] = UserCodeFragment containing the type. 
                                array[1] = name of the variable.
                 dictionary  if it was a combined variable definition. The dictionary
                               maps: (variable name) ---> (UserCodeFragment with type)
    
    """
    position = fh.tell()

    skip_whitespace(fh)
    name_str = read_identifier(fh)
    if name_str == "":
        if not GroupF or not check(fh, "{"): 
            fh.seek(position); 
            return None
        sub_db = parse_variable_definition_list(fh, "Concurrent union variables", already_defined_list)
        if not check(fh, "}"): 
            fh.seek(position)
            error_msg("Missing closing '}' after concurrent variable definition.", fh)
        return [ sub_db ]

    else:
        name_str = name_str.strip()
        if not check(fh, ":"): error_msg("Missing ':' after identifier '%s'." % name_str, fh)
        
        if fh.read(1).isspace() == False:
            error_msg("Missing whitespace after ':' after identifier '%s'.\n" % name_str \
                    + "The notation has to be: variable-name ':' type ';'.", fh)

        type_str, i = read_until_letter(fh, ";", Verbose=True)
        if i == -1: error_msg("missing ';'", fh)
        type_str = type_str.strip()

        return [ CodeUser(type_str, SourceRef.from_FileHandle(fh)), name_str ]
Esempio n. 31
0
def parse_token_id_definitions(fh, NamesOnlyF=False):
    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    token_prefix = Setup.token_id_prefix
    token_prefix_plain = Setup.token_id_prefix_plain  # i.e. without name space included

    if NamesOnlyF: db = {}
    else: db = blackboard.token_id_db

    skip_whitespace(fh)
    if not check(fh, "{"):
        error_msg(
            "missing opening '{' for after 'token' section identifier.\n", fh)

    while check(fh, "}") == False:
        skip_whitespace(fh)

        candidate = read_identifier(fh, TolerantF=True)

        if candidate == "":
            error_msg("Missing valid token identifier." % candidate, fh)

        # -- check the name, if it starts with the token prefix paste a warning
        if candidate.find(token_prefix) == 0:
            error_msg("Token identifier '%s' starts with token prefix '%s'.\n" % (candidate, token_prefix) + \
                      "Token prefix is mounted automatically. This token id appears in the source\n" + \
                      "code as '%s%s'." % (token_prefix, candidate), \
                      fh, DontExitF=True)
        elif candidate.find(token_prefix_plain) == 0:
            error_msg("Token identifier '%s' starts with token prefix '%s'.\n" % (candidate, token_prefix) + \
                      "Token prefix is mounted automatically. This token id appears in the source\n" + \
                      "code as '%s%s'." % (token_prefix, candidate), \
                      fh, DontExitF=True)

        skip_whitespace(fh)

        if NamesOnlyF:
            db[token_prefix + candidate] = True
            if check(fh, ";") == False:
                error_msg("Missing ';' after definition of token identifier '%s'.\n" % candidate + \
                          "This is mandatory since Quex version 0.50.1.", fh)
            continue

        # Parse a possible numeric value after '='
        numeric_value = None
        if check(fh, "="):
            skip_whitespace(fh)
            numeric_value = read_integer(fh)
            if numeric_value is None:
                error_msg(
                    "Missing number after '=' for token identifier '%s'." %
                    candidate, fh)

        if check(fh, ";") == False:
            error_msg("Missing ';' after definition of token identifier '%s'.\n" % candidate + \
                      "This is mandatory since Quex version 0.50.1.", fh)

        db[candidate] = TokenInfo(candidate,
                                  numeric_value,
                                  Filename=fh.name,
                                  LineN=get_current_line_info_number(fh))

    if NamesOnlyF:
        result = db.keys()
        result.sort()
        return result
Esempio n. 32
0
def parse_section(fh):
    global default_token_type_definition_triggered_by_mode_definition_f

    # NOTE: End of File is supposed to be reached when trying to read a new
    #       section. Thus, the end-of-file catcher does not encompass the beginning.
    position = fh.tell()
    skip_whitespace(fh)
    word = read_identifier(fh)
    if word == "":
        error_msg("Missing section title.", fh)

    verify_word_in_list(word, blackboard.all_section_title_list,
                        "Unknown quex section '%s'" % word, fh)
    try:
        # (*) determine what is defined
        #
        #     -- 'mode { ... }'     => define a mode
        #     -- 'start = ...;'     => define the name of the initial mode
        #     -- 'header { ... }'   => define code that is to be pasted on top
        #                              of the engine (e.g. "#include<...>")
        #     -- 'body { ... }'     => define code that is to be pasted in the class' body
        #                              of the engine (e.g. "public: int  my_member;")
        #     -- 'init { ... }'     => define code that is to be pasted in the class' constructors
        #                              of the engine (e.g. "my_member = -1;")
        #     -- 'define { ... }'   => define patterns shorthands such as IDENTIFIER for [a-z]+
        #     -- 'repeated_token_id = QUEX_TKN_ ...;' => enables token repetition, defines
        #                                                the token id to be repeated.
        #     -- 'token { ... }'    => define token ids
        #     -- 'token_type { ... }'  => define a customized token type
        #
        if word in blackboard.fragment_db.keys():
            element_name = blackboard.fragment_db[word]
            fragment = code_fragment.parse(fh,
                                           word,
                                           AllowBriefTokenSenderF=False)
            blackboard.__dict__[element_name] = fragment
            return

        elif word == "start":
            mode_name = parse_identifier_assignment(fh)
            if mode_name == "":
                error_msg("Missing mode_name after 'start ='", fh)
            elif blackboard.initial_mode.get_pure_code() != "":
                error_msg("start mode defined more than once!",
                          fh,
                          DontExitF=True)
                error_msg("previously defined here",
                          blackboard.initial_mode.filename,
                          blackboard.initial_mode.line_n)

            blackboard.initial_mode = UserCodeFragment(
                mode_name, fh.name, get_current_line_info_number(fh))
            return

        elif word == "repeated_token":
            blackboard.token_repetition_token_id_list = parse_token_id_definitions(
                fh, NamesOnlyF=True)
            for token_name in blackboard.token_repetition_token_id_list:
                verify_word_in_list(token_name[len(Setup.token_id_prefix):],
                                    blackboard.token_id_db.keys(),
                                    "Token ID '%s' not yet defined." %
                                    token_name,
                                    fh,
                                    ExitF=False)
            return

        elif word == "define":
            parse_pattern_name_definitions(fh)
            return

        elif word == "token":
            parse_token_id_definitions(fh)
            return

        elif word == "token_type":

            if Setup.token_class_file != "":
                error_msg("Token type definition inadmissible while specifying on the command line\n" + \
                          "the file %s to contain a manually written token class." % repr(Setup.token_class_file),
                          fh)

            if blackboard.token_type_definition is None:
                blackboard.token_type_definition = token_type.parse(fh)
                return

            # Error case:
            if default_token_type_definition_triggered_by_mode_definition_f:
                error_msg(
                    "Section 'token_type' must appear before first mode definition.",
                    fh)
            else:
                error_msg("Section 'token_type' has been defined twice.",
                          fh,
                          DontExitF=True)
                error_msg(
                    "Previously defined here.", blackboard.
                    token_type_definition.file_name_of_token_type_definition,
                    blackboard.token_type_definition.
                    line_n_of_token_type_definition)
            return

        elif word == "mode":
            # When the first mode is parsed then a token_type definition must be
            # present. If not, the default token type definition is considered.
            if blackboard.token_type_definition is None:
                parse_default_token_definition()
                default_token_type_definition_triggered_by_mode_definition_f = True

            mode.parse(fh)
            return

        else:
            # This case should have been caught by the 'verify_word_in_list' function
            assert False

    except EndOfStreamException:
        fh.seek(position)
        error_msg("End of file reached while parsing '%s' section" % word, fh)
Esempio n. 33
0
File: core.py Progetto: xxyzzzq/quex
def parse_section(fh):
    global default_token_type_definition_triggered_by_mode_definition_f

    # NOTE: End of File is supposed to be reached when trying to read a new
    #       section. Thus, the end-of-file catcher does not encompass the beginning.
    position = fh.tell()
    skip_whitespace(fh)
    word = read_identifier(fh, OnMissingStr="Missing section title")

    error.verify_word_in_list(word, blackboard.all_section_title_list, 
                        "Unknown quex section '%s'" % word, fh)
    try:
        # (*) determine what is defined
        #
        #     -- 'mode { ... }'     => define a mode
        #     -- 'start = ...;'     => define the name of the initial mode
        #     -- 'header { ... }'   => define code that is to be pasted on top
        #                              of the engine (e.g. "#include<...>")
        #     -- 'body { ... }'     => define code that is to be pasted in the class' body
        #                              of the engine (e.g. "public: int  my_member;")
        #     -- 'init { ... }'     => define code that is to be pasted in the class' constructors
        #                              of the engine (e.g. "my_member = -1;")
        #     -- 'define { ... }'   => define patterns shorthands such as IDENTIFIER for [a-z]+
        #     -- 'repeated_token_id = QUEX_TKN_ ...;' => enables token repetition, defines
        #                                                the token id to be repeated.
        #     -- 'token { ... }'    => define token ids
        #     -- 'token_type { ... }'  => define a customized token type
        #
        if word in blackboard.fragment_db.keys():
            element_name = blackboard.fragment_db[word]
            fragment     = code_fragment.parse(fh, word, AllowBriefTokenSenderF=False)        
            blackboard.__dict__[element_name] = fragment
            return

        elif word == "start":
            mode_name = parse_identifier_assignment(fh)
            if mode_name == "":
                error.log("Missing mode_name after 'start ='", fh)

            elif not blackboard.initial_mode.sr.is_void():
                error.log("start mode defined more than once!", fh, DontExitF=True)
                error.log("previously defined here", blackboard.initial_mode.sr)
             
            blackboard.initial_mode = CodeUser(mode_name, SourceRef.from_FileHandle(fh))
            return

        elif word == "repeated_token":
            blackboard.token_repetition_token_id_list = parse_token_id_definitions(fh, NamesOnlyF=True)
            for token_name in blackboard.token_repetition_token_id_list:
                error.verify_word_in_list(token_name[len(Setup.token_id_prefix):],
                                    blackboard.token_id_db.keys(),
                                    "Token ID '%s' not yet defined." % token_name, fh,
                                    ExitF=False, 
                                    SuppressCode=NotificationDB.warning_repeated_token_not_yet_defined)
            return
            
        elif word == "define":
            parse_pattern_name_definitions(fh)
            return

        elif word == "token":       
            if Setup.token_id_foreign_definition:
                error.log("Token id file '%s' has been specified.\n" \
                          % Setup.token_id_foreign_definition_file \
                          + "All token ids must be specified there. Section 'token'\n" \
                          + "is not allowed.", fh)

            parse_token_id_definitions(fh)
            return

        elif word == "token_type":       

            if Setup.token_class_file != "":
                error.log("Section 'token_type' is intended to generate a token class.\n" \
                          + "However, the manually written token class file '%s'" \
                          % repr(Setup.token_class_file) \
                          + "has been specified on the command line.", 
                          fh)
       
            if blackboard.token_type_definition is None:
                blackboard.token_type_definition = token_type.parse(fh)
                return

            # Error case:
            if default_token_type_definition_triggered_by_mode_definition_f:
                error.log("Section 'token_type' must appear before first mode definition.", 
                          fh)
            else:
                error.log("Section 'token_type' has been defined twice.", 
                          fh, DontExitF=True)
                error.log("Previously defined here.",
                          blackboard.token_type_definition.sr)
            return

        elif word == "mode":
            # When the first mode is parsed then a token_type definition must be 
            # present. If not, the default token type definition is considered.
            if blackboard.token_type_definition is None:
                parse_default_token_definition()
                default_token_type_definition_triggered_by_mode_definition_f = True

            mode.parse(fh)
            return

        else:
            # This case should have been caught by the 'verify_word_in_list' function
            assert False

    except EndOfStreamException:
        fh.seek(position)
        error.error_eof(word, fh)
Esempio n. 34
0
File: core.py Progetto: xxyzzzq/quex
def parse_token_id_definitions(fh, NamesOnlyF=False):
    """NamesOnlyF == True: Allow only definition of names, no numeric values 
                           may be assigned to it.

       'NamesOnlyF' indicates that data is not written to the global 
       'token_id_db'. Then only a list of names is returned.
    """
    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    prefix       = Setup.token_id_prefix
    prefix_plain = Setup.token_id_prefix_plain # i.e. without name space included

    if NamesOnlyF: 
        result = set()

    skip_whitespace(fh)
    if not check(fh, "{"):
        error.log("Missing opening '{' for after 'token' section identifier.", 
                  fh)

    while check(fh, "}") == False:
        skip_whitespace(fh)

        candidate = read_identifier(fh, TolerantF=True, OnMissingStr="Missing valid token identifier.")

        # -- check the name, if it starts with the token prefix paste a warning
        suspicious_prefix = None
        if len(prefix) != 0 and candidate.find(prefix) == 0:       
            suspicious_prefix = prefix
        elif len(prefix_plain) != 0 and candidate.find(prefix_plain) == 0: 
            suspicious_prefix = prefix_plain

        if suspicious_prefix is not None:
            error.warning("Token identifier '%s' starts with token prefix '%s'.\n" \
                      % (candidate, suspicious_prefix) \
                      + "Token prefix is mounted automatically. This token id appears in the source\n" \
                      + "code as '%s%s'." \
                      % (prefix, candidate), \
                      fh, 
                      SuppressCode=NotificationDB.warning_token_id_prefix_appears_in_token_id_name)

        skip_whitespace(fh)

        if NamesOnlyF:
            result.add(prefix + candidate)
            if check(fh, ";") == False:
                error.log("Missing ';' after token identifier '%s'.\n" \
                          % candidate, fh)
            continue

        # Parse a possible numeric value after '='
        numeric_value = None
        if check(fh, "="):
            skip_whitespace(fh)
            numeric_value = read_integer(fh)
            if numeric_value is None:
                error.log("Missing number after '=' for token identifier '%s'." % candidate, 
                          fh)

        if check(fh, ";") == False:
            error.log("Missing ';' after token identifier '%s'." % candidate, 
                      fh)

        if not NamesOnlyF:
            ti = TokenInfo(candidate, numeric_value, 
                           SourceReference=SourceRef.from_FileHandle(fh))
            blackboard.token_id_db[candidate] = ti

    if NamesOnlyF:
        return sorted(list(result))
    else:
        return # Changes are applied to 'blackboard.token_id_db'
Esempio n. 35
0
def do(fh):
    """Parses pattern definitions of the form:
   
          [ \t]                                       => grid 4;
          [:intersection([:alpha:], [\X064-\X066]):]  => space 1;

       In other words the right hand side *must* be a character set.
          
    """
    indentation_setup = IndentationSetup(fh)

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    skip_whitespace(fh)

    while 1 + 1 == 2:
        skip_whitespace(fh)

        if check(fh, ">"):
            indentation_setup.seal()
            indentation_setup.consistency_check(fh)
            return indentation_setup

        # A regular expression state machine
        pattern_str, pattern = regular_expression.parse(fh)

        skip_whitespace(fh)
        if not check(fh, "=>"):
            error_msg("Missing '=>' after character set definition.", fh)

        skip_whitespace(fh)
        identifier = read_identifier(fh)
        if identifier == "":
            error_msg("Missing identifier for indentation element definition.",
                      fh)

        verify_word_in_list(
            identifier, ["space", "grid", "bad", "newline", "suppressor"],
            "Unrecognized indentation specifier '%s'." % identifier, fh)

        trigger_set = None
        if identifier in ["space", "bad", "grid"]:
            if len(pattern.sm.states) != 2:
                error_msg("For indentation '%s' only patterns are addmissible which\n" % identifier + \
                          "can be matched by a single character, e.g. \" \" or [a-z].", fh)
            transition_map = pattern.sm.get_init_state().transitions().get_map(
            )
            assert len(transition_map) == 1
            trigger_set = transition_map.values()[0]

        skip_whitespace(fh)
        if identifier == "space":
            value = read_integer(fh)
            if value is not None:
                indentation_setup.specify_space(pattern_str, trigger_set,
                                                value, fh)
            else:
                # not a number received, is it an identifier?
                variable = read_identifier(fh)
                if variable != "":
                    indentation_setup.specify_space(pattern_str, trigger_set,
                                                    variable, fh)
                else:
                    indentation_setup.specify_space(pattern_str, trigger_set,
                                                    1, fh)

        elif identifier == "grid":
            value = read_integer(fh)
            if value is not None:
                indentation_setup.specify_grid(pattern_str, trigger_set, value,
                                               fh)
            else:
                # not a number received, is it an identifier?
                skip_whitespace(fh)
                variable = read_identifier(fh)
                if variable != "":
                    indentation_setup.specify_grid(pattern_str, trigger_set,
                                                   variable, fh)
                else:
                    error_msg(
                        "Missing integer or variable name after keyword 'grid'.",
                        fh)

        elif identifier == "bad":
            indentation_setup.specify_bad(pattern_str, trigger_set, fh)

        elif identifier == "newline":
            indentation_setup.specify_newline(pattern_str, pattern.sm, fh)

        elif identifier == "suppressor":
            indentation_setup.specify_suppressor(pattern_str, pattern.sm, fh)

        else:
            assert False, "Unreachable code reached."

        if not check(fh, ";"):
            error_msg(
                "Missing ';' after indentation '%s' specification." %
                identifier, fh)
Esempio n. 36
0
def __parse_section(fh, descriptor, already_defined_list):
    global token_type_code_fragment_db
    assert type(already_defined_list) == list

    SubsectionList = ["name", "file_name", "standard", "distinct", "union", "inheritable", "noid"] \
                      + token_type_code_fragment_db.keys()

    position = fh.tell()
    skip_whitespace(fh)
    word = read_identifier(fh)
    if word == "":
        fh.seek(position)
        if check(fh, "}"):
            fh.seek(position)
            return False
        error_msg(
            "Missing token_type section ('standard', 'distinct', or 'union').",
            fh)

    verify_word_in_list(
        word, SubsectionList,
        "Subsection '%s' not allowed in token_type section." % word, fh)

    if word == "name":
        if not check(fh, "="):
            error_msg("Missing '=' in token_type 'name' specification.", fh)
        descriptor.class_name, descriptor.name_space, descriptor.class_name_safe = read_namespaced_name(
            fh, "token_type")
        if not check(fh, ";"):
            error_msg(
                "Missing terminating ';' in token_type 'name' specification.",
                fh)

    elif word == "inheritable":
        descriptor.open_for_derivation_f = True
        check_or_die(fh, ";")

    elif word == "noid":
        descriptor.token_contains_token_id_f = False
        check_or_die(fh, ";")

    elif word == "file_name":
        if not check(fh, "="):
            error_msg("Missing '=' in token_type 'file_name' specification.",
                      fh)
        descriptor.set_file_name(read_until_letter(fh, ";"))
        if not check(fh, ";"):
            error_msg(
                "Missing terminating ';' in token_type 'file_name' specification.",
                fh)

    elif word in ["standard", "distinct", "union"]:
        if word == "standard":
            parse_standard_members(fh, word, descriptor, already_defined_list)
        elif word == "distinct":
            parse_distinct_members(fh, word, descriptor, already_defined_list)
        elif word == "union":
            parse_union_members(fh, word, descriptor, already_defined_list)

        if not check(fh, "}"):
            fh.seek(position)
            error_msg(
                "Missing closing '}' at end of token_type section '%s'." %
                word, fh)

    elif word in token_type_code_fragment_db.keys():
        fragment = code_fragment.parse(fh, word, AllowBriefTokenSenderF=False)
        descriptor.__dict__[word] = fragment

    else:
        assert False, "This code section section should not be reachable because 'word'\n" + \
                      "was checked to fit in one of the 'elif' cases."

    return True
Esempio n. 37
0
def parse_variable_definition(fh, GroupF=False, already_defined_list=[]):
    """PURPOSE: Parsing of a variable definition consisting of 'type' and 'name.
                Members can be mentioned together in a group, which means that
                they can appear simultaneously. Possible expresions are

                (1) single variables:

                              name0 : type;
                              name1 : type[32];
                              name2 : type*;

                (2) combined variables

                              {
                                  sub_name0 : type0;
                                  sub_name1 : type[64];
                                  sub_name2 : type1*;
                              }

       ARGUMENTS: 

        'GroupF'               allows to have 'nested variable groups' in curly brackets

        'already_defined_list' informs about variable names that have been already
                               chosen. It is only used for groups.

       RETURNS:
                 None        on failure to pass a variable definition.
                 array       when a single variable definition was found. 
                                array[0] = UserCodeFragment containing the type. 
                                array[1] = name of the variable.
                 dictionary  if it was a combined variable definition. The dictionary
                               maps: (variable name) ---> (UserCodeFragment with type)
    
    """
    position = fh.tell()

    skip_whitespace(fh)
    name_str = read_identifier(fh)
    if name_str == "":
        if not GroupF or not check(fh, "{"):
            fh.seek(position)
            return None
        sub_db = parse_variable_definition_list(fh,
                                                "Concurrent union variables",
                                                already_defined_list)
        if not check(fh, "}"):
            fh.seek(position)
            error_msg(
                "Missing closing '}' after concurrent variable definition.",
                fh)
        return [sub_db]

    else:
        name_str = name_str.strip()
        if not check(fh, ":"):
            error_msg("Missing ':' after identifier '%s'." % name_str, fh)

        if fh.read(1).isspace() == False:
            error_msg("Missing whitespace after ':' after identifier '%s'.\n" % name_str \
                    + "The notation has to be: variable-name ':' type ';'.", fh)

        type_str, i = read_until_letter(fh, ";", Verbose=True)
        if i == -1: error_msg("missing ';'", fh)
        type_str = type_str.strip()

        return [CodeUser(type_str, SourceRef.from_FileHandle(fh)), name_str]