Example #1
0
def parse(fh):
    descriptor = TokenTypeDescriptorCore()

    if not check(fh, "{"):
        error_msg("Missing opening '{' at begin of token_type definition", fh)

    already_defined_list = []
    position             = fh.tell()
    sr_begin             = SourceRef.from_FileHandle(fh)
    result               = True
    while result == True:
        try: 
            # x = fh.tell(); fh.seek(x)
            result = parse_section(fh, descriptor, already_defined_list)
        except EndOfStreamException:
            fh.seek(position)
            error_eof("token_type", fh)

        
    if not check(fh, "}"):
        fh.seek(position)
        error_msg("Missing closing '}' at end of token_type definition.", fh);

    result = TokenTypeDescriptor(descriptor, sr_begin)
    if     len(result.get_member_db()) == 0       \
       and result.class_name == "Token"           \
       and result.token_id_type.sr.is_void()      \
       and result.column_number_type.sr.is_void() \
       and result.line_number_type.sr.is_void():
        error_msg("Section 'token_type' does not define any members, does not\n" + \
                  "modify any standard member types, nor does it define a class\n" + \
                  "different from 'Token'.", fh)

    result.consistency_check()
    return result
Example #2
0
def parse(fh):
    descriptor = TokenTypeDescriptorCore()

    if not check(fh, "{"):
        error_msg("Missing opening '{' at begin of token_type definition", fh)

    already_defined_list = []
    position = fh.tell()
    sr_begin = SourceRef.from_FileHandle(fh)
    result = True
    while result == True:
        try:
            # x = fh.tell(); fh.seek(x)
            result = parse_section(fh, descriptor, already_defined_list)
        except EndOfStreamException:
            fh.seek(position)
            error_eof("token_type", fh)

    if not check(fh, "}"):
        fh.seek(position)
        error_msg("Missing closing '}' at end of token_type definition.", fh)

    result = TokenTypeDescriptor(descriptor, sr_begin)
    if     len(result.get_member_db()) == 0       \
       and result.class_name == "Token"           \
       and result.token_id_type.sr.is_void()      \
       and result.column_number_type.sr.is_void() \
       and result.line_number_type.sr.is_void():
        error_msg("Section 'token_type' does not define any members, does not\n" + \
                  "modify any standard member types, nor does it define a class\n" + \
                  "different from 'Token'.", fh)

    result.consistency_check()
    return result
Example #3
0
def parse_token_id_definitions(fh, NamesOnlyF=False):
    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    token_prefix       = Setup.token_id_prefix
    token_prefix_plain = Setup.token_id_prefix_plain # i.e. without name space included

    if NamesOnlyF: db = {}
    else:          db = blackboard.token_id_db

    skip_whitespace(fh)
    if not check(fh, "{"):
        error_msg("missing opening '{' for after 'token' section identifier.\n", fh)

    while check(fh, "}") == False:
        skip_whitespace(fh)

        candidate = read_identifier(fh, TolerantF=True)

        if candidate == "":
            error_msg("Missing valid token identifier." % candidate, fh)

        # -- check the name, if it starts with the token prefix paste a warning
        if candidate.find(token_prefix) == 0:
            error_msg("Token identifier '%s' starts with token prefix '%s'.\n" % (candidate, token_prefix) + \
                      "Token prefix is mounted automatically. This token id appears in the source\n" + \
                      "code as '%s%s'." % (token_prefix, candidate), \
                      fh, DontExitF=True)
        elif candidate.find(token_prefix_plain) == 0:
            error_msg("Token identifier '%s' starts with token prefix '%s'.\n" % (candidate, token_prefix) + \
                      "Token prefix is mounted automatically. This token id appears in the source\n" + \
                      "code as '%s%s'." % (token_prefix, candidate), \
                      fh, DontExitF=True)

        skip_whitespace(fh)

        if NamesOnlyF:
            db[token_prefix + candidate] = True
            if check(fh, ";") == False:
                error_msg("Missing ';' after definition of token identifier '%s'.\n" % candidate + \
                          "This is mandatory since Quex version 0.50.1.", fh)
            continue

        # Parse a possible numeric value after '='
        numeric_value = None
        if check(fh, "="):
            skip_whitespace(fh)
            numeric_value = read_integer(fh)
            if numeric_value is None:
                error_msg("Missing number after '=' for token identifier '%s'." % candidate, fh)

        if check(fh, ";") == False:
            error_msg("Missing ';' after definition of token identifier '%s'.\n" % candidate + \
                      "This is mandatory since Quex version 0.50.1.", fh)

        db[candidate] = TokenInfo(candidate, numeric_value, Filename=fh.name, LineN=get_current_line_info_number(fh))

    if NamesOnlyF:
        result = db.keys()
        result.sort()
        return result
Example #4
0
def __parse_base_mode_list(fh, new_mode):
    new_mode.base_modes = []
    trailing_comma_f    = False
    while 1 + 1 == 2:
        if   check(fh, "{"): fh.seek(-1, 1); break
        elif check(fh, "<"): fh.seek(-1, 1); break

        skip_whitespace(fh)
        identifier = read_identifier(fh)
        if identifier == "": break

        new_mode.base_modes.append(identifier)
        trailing_comma_f = False
        if not check(fh, ","): break
        trailing_comma_f = True


    if trailing_comma_f:
        error_msg("Trailing ',' after base mode '%s'." % new_mode.base_modes[-1], fh, 
                  DontExitF=True, WarningF=True)
        
    elif len(new_mode.base_modes) != 0:
        # This check is a 'service' -- for those who follow the old convention
        pos = fh.tell()
        skip_whitespace(fh)
        dummy_identifier = read_identifier(fh)
        if dummy_identifier != "":
            error_msg("Missing separating ',' between base modes '%s' and '%s'.\n" \
                      % (new_mode.base_modes[-1], dummy_identifier) + \
                      "(The comma separator is mandatory since quex 0.53.1)", fh)
        fh.seek(pos)
Example #5
0
    def _base_parse(self, fh, IndentationSetupF=False):
        """Parses pattern definitions of the form:
       
              [ \t]                                       => grid 4;
              [:intersection([:alpha:], [\X064-\X066]):]  => space 1;

        In other words the right hand side *must* be a character set.

        ADAPTS: result to contain parsing information.
        """

        # NOTE: Catching of EOF happens in caller: parse_section(...)
        #
        while 1 + 1 == 2:
            skip_whitespace(fh)
            if check(fh, ">"): 
                break
            
            # A regular expression state machine
            pattern, identifier, sr = _parse_definition_head(fh, self.identifier_list)
            if pattern is None and IndentationSetupF:
                error.log("Keyword '\\else' cannot be used in indentation setup.", fh)

            # '_parse_definition_head()' ensures that only identifiers mentioned in 
            # 'result' are accepted. 
            if self.requires_count():
                count = _read_value_specifier(fh, identifier, 1)
                self.specify(identifier, pattern, count, sr)
            else:
                self.specify(identifier, pattern, sr)

            if not check(fh, ";"):
                error.log("Missing ';' after '%s' specification." % identifier, fh)

        return self.finalize()
Example #6
0
def __parse_base_mode_list(fh, new_mode):
    new_mode.derived_from_list = []
    trailing_comma_f = False
    while 1 + 1 == 2:
        if check(fh, "{"):
            fh.seek(-1, 1)
            break
        elif check(fh, "<"):
            fh.seek(-1, 1)
            break

        skip_whitespace(fh)
        identifier = read_identifier(fh)
        if identifier == "": break

        new_mode.derived_from_list.append(identifier)
        trailing_comma_f = False
        if not check(fh, ","): break
        trailing_comma_f = True

    if trailing_comma_f:
        error.warning(
            "Trailing ',' after base mode '%s'." %
            new_mode.derived_from_list[-1], fh)

    elif len(new_mode.derived_from_list) != 0:
        # This check is a 'service' -- for those who follow the old convention
        pos = fh.tell()
        skip_whitespace(fh)
        dummy_identifier = read_identifier(fh)
        if dummy_identifier != "":
            error.log("Missing separating ',' between base modes '%s' and '%s'.\n" \
                      % (new_mode.derived_from_list[-1], dummy_identifier) + \
                      "(The comma separator is mandatory since quex 0.53.1)", fh)
        fh.seek(pos)
Example #7
0
def __parse(fh, result, IndentationSetupF=False):
    """Parses pattern definitions of the form:
   
          [ \t]                                       => grid 4;
          [:intersection([:alpha:], [\X064-\X066]):]  => space 1;

       In other words the right hand side *must* be a character set.
    """

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    while 1 + 1 == 2:
        skip_whitespace(fh)
        if check(fh, ">"):
            break

        # A regular expression state machine
        pattern, identifier, sr = __parse_definition_head(fh, result)
        if pattern is None and IndentationSetupF:
            error.log("Keyword '\\else' cannot be used in indentation setup.",
                      fh)

        # '__parse_definition_head()' ensures that only identifiers mentioned in
        # 'result' are accepted.
        if not IndentationSetupF:
            value = read_value_specifier(fh, identifier, 1)
            result.specify(identifier, pattern, value, sr)
        else:
            result.specify(identifier, pattern, sr)

        if not check(fh, ";"):
            error.log("Missing ';' after '%s' specification." % identifier, fh)

    return result
Example #8
0
def parse(fh):
    """Parses pattern definitions of the form:
   
          WHITESPACE  [ \t\n]
          IDENTIFIER  [a-zA-Z0-9]+
          OP_PLUS     "+"

          \function SOMETHING(sm = X, set = Y, number = N):
          
       That means: 'name' whitespace 'regular expression' whitespace newline.
       Comments can only be '//' nothing else and they have to appear at the
       beginning of the line.
       
       One regular expression can have more than one name, but one name can 
       only have one regular expression.
    """
    skip_whitespace(fh)
    if not check(fh, "{"):
        error.log("define region must start with opening '{'.", fh)

    while 1 + 1 == 2:
        skip_whitespace(fh)

        if check(fh, "}"): 
            return
        
        # Get the name of the pattern
        skip_whitespace(fh)
        if check(fh, "\\function"): name, value = _parse_function(fh)
        else:                       name, value = _parse_pattern(fh)

        blackboard.shorthand_db[name] = value
Example #9
0
def __parse(fh, result, IndentationSetupF=False):
    """Parses pattern definitions of the form:
   
          [ \t]                                       => grid 4;
          [:intersection([:alpha:], [\X064-\X066]):]  => space 1;

       In other words the right hand side *must* be a character set.
    """

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    while 1 + 1 == 2:
        skip_whitespace(fh)
        if check(fh, ">"): 
            break
        
        # A regular expression state machine
        pattern, identifier, sr = __parse_definition_head(fh, result)
        if pattern is None and IndentationSetupF:
            error.log("Keyword '\\else' cannot be used in indentation setup.", fh)

        # '__parse_definition_head()' ensures that only identifiers mentioned in 
        # 'result' are accepted. 
        if not IndentationSetupF:
            value = read_value_specifier(fh, identifier, 1)
            result.specify(identifier, pattern, value, sr)
        else:
            result.specify(identifier, pattern, sr)

        if not check(fh, ";"):
            error.log("Missing ';' after '%s' specification." % identifier, fh)

    return result
Example #10
0
def snap_curly_bracketed_expression(stream,
                                    PatternDict,
                                    Name,
                                    TriggerChar,
                                    MinN=1,
                                    MaxN=1):
    """Snaps a list of RE's in '{' and '}'. The separator between the patterns 
    is whitespace. 'MinN' and 'MaxN' determine the number of expected patterns.
    Set 'MaxN=INTEGER_MAX' for an arbitrary number of patterns.

    RETURNS: result = list of patterns, if MinN <= len(result) <= MaxN 
             else, the function sys.exit()-s.
    """
    assert MinN <= MaxN
    assert MinN > 0

    skip_whitespace(stream)

    # Read over the trigger character
    if not check(stream, "{"):
        error.log("Missing opening '{' after %s %s." % (Name, TriggerChar),
                  stream)

    result = []
    while 1 + 1 == 2:
        pattern = snap_expression(stream, PatternDict)
        if pattern is not None:
            result.append(pattern)

        if check(stream, "}"):
            break
        elif check_whitespace(stream):
            continue
        elif check(stream, "/") or check(stream, "$"):
            error.log(
                "Pre- or post contexts are not allowed in %s \\%s{...} expressions."
                % (Name, TriggerChar), stream)
        else:
            error.log(
                "Missing closing '}' %s in \\%s{...}." % (Name, TriggerChar),
                stream)

    if MinN != MaxN:
        if len(result) < MinN:
            error.log("At minimum %i pattern%s required between '{' and '}'" \
                      % (MinN, "" if MinN == 1 else "s"), stream)
        if len(result) > MaxN:
            error.log("At maximum %i pattern%s required between '{' and '}'" \
                      % (MaxN, "" if MaxN == 1 else "s"), stream)
    else:
        if len(result) != MinN:
            error.log("Exactly %i pattern%s required between '{' and '}'" \
                      % (MinN, "" if MinN == 1 else "s"), stream)

    def ensure_dfa(sm):
        if not sm.is_DFA_compliant(): return nfa_to_dfa.do(sm)
        else: return sm

    return [ensure_dfa(sm) for sm in result]
Example #11
0
def do(UTF8_String_or_Stream, PatternDict, 
       AllowNothingIsNecessaryF = False, SpecialTerminator=None):
    global SPECIAL_TERMINATOR 
    assert type(AllowNothingIsNecessaryF) == bool
    assert type(PatternDict) == dict

    # SPECIAL_TERMINATOR --> if string is not only to be terminated by ' '
    SPECIAL_TERMINATOR = SpecialTerminator

    def __ensure_whitespace_follows(InitialPos, stream):
        tmp = stream.read(1)
        if tmp == "" or tmp.isspace() or tmp == SPECIAL_TERMINATOR:
            stream.seek(-1, 1)
            return

        end_position = stream.tell() - 1
        stream.seek(InitialPos)
        pattern_str = stream.read(end_position - InitialPos)
        error_msg("Pattern definition '%s' not followed by whitespace.\n" % pattern_str + \
                  "Found subsequent character '%s'." % tmp, 
                  stream)

    if type(UTF8_String_or_Stream) == str: stream = StringIO(UTF8_String_or_Stream)
    else:                                  stream = UTF8_String_or_Stream    

    if PatternDict is None: PatternDict = {}

    initial_position = stream.tell()

    # -- check for the begin of line condition (BOL)
    if check(stream, '^'): begin_of_line_f = True
    else:                  begin_of_line_f = False
    
    # -- MAIN: transform the pattern into a state machine
    pre, core, post = snap_conditional_expression(stream, PatternDict)

    if core is None: 
        stream.seek(initial_position)
        return None

    # -- check for end of line condition (EOL) 
    # -- check for terminating whitespace
    end_of_line_f = False
    if check(stream, '$'): end_of_line_f = True

    __ensure_whitespace_follows(initial_position, stream)
    
    pattern = Pattern(CoreSM        = core, 
                      BeginOfLineF  = begin_of_line_f,
                      PreContextSM  = pre,
                      EndOfLineF    = end_of_line_f,
                      PostContextSM = post,
                      Sr            = SourceRef.from_FileHandle(stream),
                      PatternString = read_pattern_string(stream, initial_position),
                      AllowNothingIsNecessaryF = AllowNothingIsNecessaryF)
    
    return pattern
Example #12
0
def do(UTF8_String_or_Stream,
       PatternDict,
       AllowNothingIsNecessaryF=False,
       AllowStateMachineTrafoF=True):
    assert type(AllowNothingIsNecessaryF) == bool
    assert type(PatternDict) == dict

    def __ensure_whitespace_follows(InitialPos, stream):
        tmp = stream.read(1)
        if tmp == "" or tmp.isspace():
            stream.seek(-1, 1)
            return

        end_position = stream.tell() - 1
        stream.seek(InitialPos)
        pattern_str = stream.read(end_position - InitialPos)
        error_msg("Pattern definition '%s' not followed by whitespace.\n" % pattern_str + \
                  "Found subsequent character '%s'." % tmp,
                  stream)

    if type(UTF8_String_or_Stream) == str:
        stream = StringIO(UTF8_String_or_Stream)
    else:
        stream = UTF8_String_or_Stream

    if PatternDict is None: PatternDict = {}

    initial_position = stream.tell()

    # -- check for the begin of line condition (BOL)
    if check(stream, '^'): begin_of_line_f = True
    else: begin_of_line_f = False

    # -- MAIN: transform the pattern into a state machine
    pre, core, post = snap_conditional_expression(stream, PatternDict)

    if core is None:
        stream.seek(initial_position)
        return None

    # -- check for end of line condition (EOL)
    # -- check for terminating whitespace
    end_of_line_f = False
    if check(stream, '$'): end_of_line_f = True

    __ensure_whitespace_follows(initial_position, stream)

    pattern = construct.do(core_sm=core,
                           begin_of_line_f=begin_of_line_f,
                           pre_context=pre,
                           end_of_line_f=end_of_line_f,
                           post_context=post,
                           fh=stream,
                           AllowNothingIsNecessaryF=AllowNothingIsNecessaryF,
                           AllowStateMachineTrafoF=AllowStateMachineTrafoF)

    return pattern
Example #13
0
def parse_pattern_name_definitions(fh):
    """Parses pattern definitions of the form:
   
          WHITESPACE  [ \t\n]
          IDENTIFIER  [a-zA-Z0-9]+
          OP_PLUS     "+"
          
       That means: 'name' whitespace 'regular expression' whitespace newline.
       Comments can only be '//' nothing else and they have to appear at the
       beginning of the line.
       
       One regular expression can have more than one name, but one name can 
       only have one regular expression.
    """
    skip_whitespace(fh)
    if not check(fh, "{"):
        error.log("define region must start with opening '{'.", fh)

    while 1 + 1 == 2:
        skip_whitespace(fh)

        if check(fh, "}"):
            return

        # -- get the name of the pattern
        skip_whitespace(fh)
        pattern_name = read_identifier(
            fh, OnMissingStr="Missing identifier for pattern definition.")

        if blackboard.shorthand_db.has_key(pattern_name):
            error.log("Second definition of pattern '%s'.\n" % pattern_name + \
                      "Pattern names must be unique.", fh)

        skip_whitespace(fh)

        if check(fh, "}"):
            error.log("Missing regular expression for pattern definition '%s'." % \
                      pattern_name, fh)

        # A regular expression state machine
        # (No possible transformation into a particular codec whatever.
        #  the state machines are transformed once, after they are expanded
        #  as patterns in a mode.)
        pattern = regular_expression.parse(fh, AllowNothingIsFineF=True)

        if pattern.has_pre_or_post_context():
            error.log("Pattern definition with pre- and/or post-context.\n" + \
                      "Pre- and Post-Contexts can only be defined inside mode definitions.",
                      fh)
        state_machine = pattern.sm

        blackboard.shorthand_db[pattern_name] = \
                PatternShorthand(pattern_name, state_machine,
                                 SourceRef.from_FileHandle(fh), pattern.pattern_string())
Example #14
0
File: core.py Project: xxyzzzq/quex
def parse_pattern_name_definitions(fh):
    """Parses pattern definitions of the form:
   
          WHITESPACE  [ \t\n]
          IDENTIFIER  [a-zA-Z0-9]+
          OP_PLUS     "+"
          
       That means: 'name' whitespace 'regular expression' whitespace newline.
       Comments can only be '//' nothing else and they have to appear at the
       beginning of the line.
       
       One regular expression can have more than one name, but one name can 
       only have one regular expression.
    """
    skip_whitespace(fh)
    if not check(fh, "{"):
        error.log("define region must start with opening '{'.", fh)

    while 1 + 1 == 2:
        skip_whitespace(fh)

        if check(fh, "}"): 
            return
        
        # -- get the name of the pattern
        skip_whitespace(fh)
        pattern_name = read_identifier(fh, OnMissingStr="Missing identifier for pattern definition.")

        if blackboard.shorthand_db.has_key(pattern_name):
            error.log("Second definition of pattern '%s'.\n" % pattern_name + \
                      "Pattern names must be unique.", fh)

        skip_whitespace(fh)

        if check(fh, "}"): 
            error.log("Missing regular expression for pattern definition '%s'." % \
                      pattern_name, fh)

        # A regular expression state machine
        # (No possible transformation into a particular codec whatever.
        #  the state machines are transformed once, after they are expanded
        #  as patterns in a mode.)
        pattern = regular_expression.parse(fh, AllowNothingIsFineF = True) 

        if pattern.has_pre_or_post_context():
            error.log("Pattern definition with pre- and/or post-context.\n" + \
                      "Pre- and Post-Contexts can only be defined inside mode definitions.", 
                      fh)
        state_machine = pattern.sm

        blackboard.shorthand_db[pattern_name] = \
                PatternShorthand(pattern_name, state_machine, 
                                 SourceRef.from_FileHandle(fh), pattern.pattern_string())
Example #15
0
def parse_pattern_name_definitions(fh):
    """Parses pattern definitions of the form:
   
          WHITESPACE  [ \t\n]
          IDENTIFIER  [a-zA-Z0-9]+
          OP_PLUS     "+"
          
       That means: 'name' whitespace 'regular expression' whitespace newline.
       Comments can only be '//' nothing else and they have to appear at the
       beginning of the line.
       
       One regular expression can have more than one name, but one name can 
       only have one regular expression.
    """
    skip_whitespace(fh)
    if not check(fh, "{"):
        error_msg("define region must start with opening '{'.", fh)

    while 1 + 1 == 2:
        skip_whitespace(fh)

        if check(fh, "}"): 
            return
        
        # -- get the name of the pattern
        skip_whitespace(fh)
        pattern_name = read_identifier(fh)
        if pattern_name == "":
            error_msg("Missing identifier for pattern definition.", fh)

        skip_whitespace(fh)

        if check(fh, "}"): 
            error_msg("Missing regular expression for pattern definition '%s'." % \
                      pattern_name, fh)

        # A regular expression state machine
        # (No possible transformation into a particular codec whatever.
        #  the state machines are transformed once, after they are expanded
        #  as patterns in a mode.)
        regular_expression_str, pattern = \
                regular_expression.parse(fh, AllowNothingIsFineF = True, 
                                         AllowStateMachineTrafoF = False) 

        if pattern.has_pre_or_post_context():
            error_msg("Pattern definition with pre- and/or post-context.\n" + \
                      "Pre- and Post-Contexts can only be defined inside mode definitions.", fh)
        state_machine = pattern.sm

        blackboard.shorthand_db[pattern_name] = \
                blackboard.PatternShorthand(pattern_name, state_machine, 
                                            fh.name, get_current_line_info_number(fh),
                                            regular_expression_str)
Example #16
0
def parse_pattern_name_definitions(fh):
    """Parses pattern definitions of the form:
   
          WHITESPACE  [ \t\n]
          IDENTIFIER  [a-zA-Z0-9]+
          OP_PLUS     "+"
          
       That means: 'name' whitespace 'regular expression' whitespace newline.
       Comments can only be '//' nothing else and they have to appear at the
       beginning of the line.
       
       One regular expression can have more than one name, but one name can 
       only have one regular expression.
    """
    skip_whitespace(fh)
    if not check(fh, "{"):
        error_msg("define region must start with opening '{'.", fh)

    while 1 + 1 == 2:
        skip_whitespace(fh)

        if check(fh, "}"):
            return

        # -- get the name of the pattern
        skip_whitespace(fh)
        pattern_name = read_identifier(fh)
        if pattern_name == "":
            error_msg("Missing identifier for pattern definition.", fh)

        skip_whitespace(fh)

        if check(fh, "}"):
            error_msg("Missing regular expression for pattern definition '%s'." % \
                      pattern_name, fh)

        # A regular expression state machine
        # (No possible transformation into a particular codec whatever.
        #  the state machines are transformed once, after they are expanded
        #  as patterns in a mode.)
        regular_expression_str, pattern = \
                regular_expression.parse(fh, AllowNothingIsFineF = True,
                                         AllowStateMachineTrafoF = False)

        if pattern.has_pre_or_post_context():
            error_msg("Pattern definition with pre- and/or post-context.\n" + \
                      "Pre- and Post-Contexts can only be defined inside mode definitions.", fh)
        state_machine = pattern.sm

        blackboard.shorthand_db[pattern_name] = \
                blackboard.PatternShorthand(pattern_name, state_machine,
                                            fh.name, get_current_line_info_number(fh),
                                            regular_expression_str)
Example #17
0
def snap_curly_bracketed_expression(stream, PatternDict, Name, TriggerChar, MinN=1, MaxN=1):
    """Snaps a list of RE's in '{' and '}'. The separator between the patterns is 
       whitespace. 'MinN' and 'MaxN' determine the number of expected patterns.
       Set 'MaxN=sys.maxint' for an arbitrary number of patterns.



       RETURNS: result = list of patterns. 

                it holds: len(result) >= MinN  
                          len(result) <= MaxN

                if not, the function sys.exit()-s.
       
    """
    assert MinN <= MaxN
    assert MinN > 0

    skip_whitespace(stream)

    # Read over the trigger character 
    if not check(stream, "{"):
        error_msg("Missing opening '{' after %s %s." % (Name, TriggerChar), stream)

    result = []
    while 1 + 1 == 2:
        pattern = snap_expression(stream, PatternDict) 
        if pattern is not None:
            result.append(pattern)

        if check(stream, "}"):
            break
        elif check_whitespace(stream):
            continue
        elif check(stream, "/") or check(stream, "$"):
            error_msg("Pre- or post contexts are not allowed in %s \\%s{...} expressions." % (Name, TriggerChar), stream)
        else:
            error_msg("Missing closing '}' %s in \\%s{...}." % (Name, TriggerChar), stream)

    if MinN != MaxN:
        if len(result) < MinN:
            error_msg("At minimum %i pattern%s required between '{' and '}'" \
                      % (MinN, "" if MinN == 1 else "s"), stream)
        if len(result) > MaxN:
            error_msg("At maximum %i pattern%s required between '{' and '}'" \
                      % (MaxN, "" if MaxN == 1 else "s"), stream)
    else:
        if len(result) != MinN:
            error_msg("Exactly %i pattern%s required between '{' and '}'" \
                      % (MinN, "" if MinN == 1 else "s"), stream)

    return result
Example #18
0
def do(UTF8_String_or_Stream, PatternDict, 
       AllowNothingIsNecessaryF     = False,
       AllowStateMachineTrafoF      = True): 
    assert type(AllowNothingIsNecessaryF) == bool
    assert type(PatternDict) == dict

    def __ensure_whitespace_follows(InitialPos, stream):
        tmp = stream.read(1)
        if tmp == "" or tmp.isspace(): stream.seek(-1, 1); return

        end_position = stream.tell() - 1
        stream.seek(InitialPos)
        pattern_str = stream.read(end_position - InitialPos)
        error_msg("Pattern definition '%s' not followed by whitespace.\n" % pattern_str + \
                  "Found subsequent character '%s'." % tmp, 
                  stream)

    if type(UTF8_String_or_Stream) == str: stream = StringIO(UTF8_String_or_Stream)
    else:                                  stream = UTF8_String_or_Stream    

    if PatternDict is None: PatternDict = {}

    initial_position = stream.tell()

    # -- check for the begin of line condition (BOL)
    if check(stream, '^'): begin_of_line_f = True
    else:                  begin_of_line_f = False
    
    # -- MAIN: transform the pattern into a state machine
    pre, core, post = snap_conditional_expression(stream, PatternDict)

    if core is None: 
        stream.seek(initial_position)
        return None

    # -- check for end of line condition (EOL) 
    # -- check for terminating whitespace
    end_of_line_f = False
    if check(stream, '$'): end_of_line_f = True

    __ensure_whitespace_follows(initial_position, stream)
    
    pattern = construct.do(core_sm         = core, 
                           begin_of_line_f = begin_of_line_f, 
                           pre_context     = pre,
                           end_of_line_f   = end_of_line_f,
                           post_context    = post, 
                           fh              = stream,
                           AllowNothingIsNecessaryF   = AllowNothingIsNecessaryF,
                           AllowStateMachineTrafoF    = AllowStateMachineTrafoF)
    
    return pattern
Example #19
0
def get_expression_in_brackets(stream, PatternDict, Name, TriggerChar):
    # Read over the trigger character 
    stream.read(1)
    if not check(stream, "{"):
        error_msg("Missing opening '{' after %s %s." % (Name, TriggerChar), stream)
    pattern = snap_expression(stream, PatternDict) 
    if check(stream, "}"):
        return pattern
    elif check(stream, "/") or check(stream, "$"):
        error_msg("Pre- or post contexts are not allowed in %s \\%s{...} expressions." % (Name, TriggerChar), stream)
    else:
        error_msg("Missing closing '}' %s in \\%s{...}." % (Name, TriggerChar), stream)
    return pattern
Example #20
0
def snap_property_set(stream):
    position = stream.tell()
    x = stream.read(2)
    if x == "\\P":
        stream.seek(position)
        return property.do(stream)
    elif x == "\\N":
        stream.seek(position)
        return property.do_shortcut(stream, "N", "na")  # UCS Property: Name
    elif x == "\\G":
        stream.seek(position)
        return property.do_shortcut(stream, "G",
                                    "gc")  # UCS Property: General_Category
    elif x == "\\E":
        skip_whitespace(stream)
        if check(stream, "{") == False:
            error.log("Missing '{' after '\\E'.", stream)
        encoding_name = __snap_until(stream, "}").strip()
        result = codec_db.get_supported_unicode_character_set(encoding_name)
        if result is None:
            error.log("Error occured at this place.", stream)
        return result
    else:
        stream.seek(position)
        return None
Example #21
0
def parse_standard_members(fh, section_name, descriptor, already_defined_list):
    if not check(fh, "{"):
        error_msg(
            "Missing opening '{' at begin of token_type section '%s'." %
            section_name, fh)

    position = fh.tell()

    while 1 + 1 == 2:
        try:
            result = parse_variable_definition(fh)
        except EndOfStreamException:
            fh.seek(position)
            error_eof("standard", fh)

        if result is None: return
        type_code_fragment, name = result[0], result[1]

        __validate_definition(type_code_fragment,
                              name,
                              already_defined_list,
                              StandardMembersF=True)

        if name == "id": descriptor.token_id_type = type_code_fragment
        elif name == "column_number":
            descriptor.column_number_type = type_code_fragment
        elif name == "line_number":
            descriptor.line_number_type = type_code_fragment
        else:
            assert False  # This should have been caught by the variable parser function

        already_defined_list.append([name, type_code_fragment])
Example #22
0
def parse_standard_members(fh, section_name, descriptor, already_defined_list):
    if not check(fh, "{"):
        error_msg("Missing opening '{' at begin of token_type section '%s'." % section_name, fh);

    position = fh.tell()

    while 1 + 1 == 2:
        try: 
            result = parse_variable_definition(fh) 
        except EndOfStreamException:
            fh.seek(position)
            error_eof("standard", fh)

        if result is None: return
        type_code_fragment, name = result[0], result[1]

        __validate_definition(type_code_fragment, name,
                              already_defined_list, StandardMembersF=True)

        if   name == "id":            descriptor.token_id_type      = type_code_fragment
        elif name == "column_number": descriptor.column_number_type = type_code_fragment
        elif name == "line_number":   descriptor.line_number_type   = type_code_fragment
        else:
            assert False # This should have been caught by the variable parser function

        already_defined_list.append([name, type_code_fragment])
Example #23
0
def snap_expression(stream, PatternDict):
    """expression:  term
                    term | expression
    """              
    __debug_entry("expression", stream)    
    # -- term
    result = snap_term(stream, PatternDict) 
    if result is None: 
        return __debug_exit(None, stream)

    # -- optional '|'
    if not check(stream, '|'): 
        return __debug_exit(result, stream)
    
    position_1 = stream.tell()
    __debug_print("'|' (in expression)")

    # -- expression
    result_2 = snap_expression(stream, PatternDict) 
    __debug_print("expression(in expression):",  result_2)
    if result_2 is None:
        stream.seek(position_1) 
        return __debug_exit(result, stream)

    result = parallelize.do([result, result_2], CloneF=True)   # CloneF = False (shold be!)
    return __debug_exit(beautifier.do(result), stream)
Example #24
0
def _parse_pattern(fh):
    name = read_identifier(fh, 
                           OnMissingStr="Missing identifier for pattern definition.")

    if blackboard.shorthand_db.has_key(name):
        error.log("Second definition of pattern '%s'.\n" % name + \
                  "Pattern names must be unique.", fh)

    skip_whitespace(fh)

    if check(fh, "}"): 
        error.log("Missing regular expression for pattern definition '%s'." % \
                  name, fh)

    # No encoding transformation, here. Transformation happens after 
    # expansion in a mode.
    pattern = regular_expression.parse(fh, AllowNothingIsFineF = True) 

    if pattern.has_pre_or_post_context():
        error.log("Pattern definition with pre- and/or post-context.\n" + \
                  "Pre- and Post-Contexts can only be defined inside mode definitions.", fh)
    state_machine = pattern.extract_sm()

    value = PatternShorthand(name, state_machine, SourceRef.from_FileHandle(fh), 
                             pattern.pattern_string())

    return name, value
Example #25
0
def snap_expression(stream, PatternDict):
    """expression:  term
                    term | expression
    """
    __debug_entry("expression", stream)
    # -- term
    result = snap_term(stream, PatternDict)
    if result is None:
        return __debug_exit(None, stream)

    # -- optional '|'
    if not check(stream, '|'):
        return __debug_exit(result, stream)

    position_1 = stream.tell()
    __debug_print("'|' (in expression)")

    # -- expression
    result_2 = snap_expression(stream, PatternDict)
    __debug_print("expression(in expression):", result_2)
    if result_2 is None:
        stream.seek(position_1)
        return __debug_exit(result, stream)

    result = parallelize.do([result, result_2],
                            CloneF=True)  # CloneF = False (shold be!)
    return __debug_exit(beautifier.do(result), stream)
Example #26
0
def parse_distinct_members(fh, section_name, descriptor, already_defined_list):
    if not check(fh, "{"):
        error.log("Missing opening '{' at begin of token_type section '%s'." % section_name, fh);

    result = parse_variable_definition_list(fh, "distinct", already_defined_list)
    if result == {}: 
        error.log("Missing variable definition in token_type 'distinct' section.", fh)
    descriptor.distinct_db = result
Example #27
0
def get_expression_in_brackets(stream, PatternDict, Name, TriggerChar):
    # Read over the trigger character
    stream.read(1)
    if not check(stream, "{"):
        error_msg("Missing opening '{' after %s %s." % (Name, TriggerChar),
                  stream)
    pattern = snap_expression(stream, PatternDict)
    if check(stream, "}"):
        return pattern
    elif check(stream, "/") or check(stream, "$"):
        error_msg(
            "Pre- or post contexts are not allowed in %s \\%s{...} expressions."
            % (Name, TriggerChar), stream)
    else:
        error_msg("Missing closing '}' %s in \\%s{...}." % (Name, TriggerChar),
                  stream)
    return pattern
Example #28
0
def snap_command(stream, PatternDict):
    global CommandDB

    for command_str, snap_function in CommandDB:
        if check(stream, command_str):
            return snap_function(stream, PatternDict)

    return None
Example #29
0
def parse_distinct_members(fh, section_name, descriptor, already_defined_list):
    if not check(fh, "{"):
        error_msg("Missing opening '{' at begin of token_type section '%s'." % section_name, fh);

    result = parse_variable_definition_list(fh, "distinct", already_defined_list)
    if result == {}: 
        error_msg("Missing variable definition in token_type 'distinct' section.", fh)
    descriptor.distinct_db = result
Example #30
0
def snap_replacement(stream, PatternDict, StateMachineF=True):
    """Snaps a predefined pattern from the input string and returns the resulting
       state machine.
    """
    skip_whitespace(stream)
    pattern_name = read_identifier(stream)
    if pattern_name == "":
        raise RegularExpressionException(
            "Pattern replacement expression misses identifier after '{'.")
    skip_whitespace(stream)

    if not check(stream, "}"):
        raise RegularExpressionException("Pattern replacement expression misses closing '}' after '%s'." \
                                         % pattern_name)

    error.verify_word_in_list(
        pattern_name, PatternDict.keys(),
        "Specifier '%s' not found in any preceeding 'define { ... }' section."
        % pattern_name, stream)

    reference = PatternDict[pattern_name]
    assert reference.__class__ == PatternShorthand

    # The replacement may be a state machine or a number set
    if StateMachineF:
        # Get a cloned version of state machine
        state_machine = reference.get_state_machine()
        assert isinstance(state_machine, DFA)

        # It is essential that state machines defined as patterns do not
        # have origins. Otherwise, the optimization of patterns that
        # contain pattern replacements might get confused and can
        # not find all optimizations.
        assert not state_machine.has_specific_acceptance_id()

        # A state machine, that contains pre- or post- conditions cannot be part
        # of a replacement. The addition of new post-contexts would mess up the pattern.
        ## if state_machine.has_pre_or_post_context():
        ##    error.log("Pre- or post-conditioned pattern was used in replacement.\n" + \
        ##              "Quex's regular expression grammar does not allow this.", stream)

        return state_machine

    else:
        # Get a cloned version of character set
        character_set = reference.get_character_set()
        if character_set is None:
            error.log(
                "Replacement in character set expression must be a character set.\n"
                "Specifier '%s' relates to a pattern state machine." %
                pattern_name, stream)

        if character_set.is_empty():
            error.log(
                "Referenced character set '%s' is empty.\nAborted." %
                pattern_name, stream)

        return character_set
Example #31
0
def parse_union_members(fh, section_name, descriptor, already_defined_list):
    if not check(fh, "{"):
        error_msg("Missing opening '{' at begin of token_type section '%s'." % section_name, fh);

    result = parse_variable_definition_list(fh, "union", already_defined_list, 
                                                         GroupF=True)
    if result == {}: 
        error_msg("Missing variable definition in token_type 'union' section.", fh)
    descriptor.union_db = result
Example #32
0
def parse_union_members(fh, section_name, descriptor, already_defined_list):
    if not check(fh, "{"):
        error.log("Missing opening '{' at begin of token_type section '%s'." % section_name, fh);

    result = parse_variable_definition_list(fh, "union", already_defined_list, 
                                                         GroupF=True)
    if result == {}: 
        error.log("Missing variable definition in token_type 'union' section.", fh)
    descriptor.union_db = result
Example #33
0
def snap_conditional_expression(stream, PatternDict):
    """conditional expression: expression
                               expression / expression                 = post conditioned expression
                               expression / expression /               = pre conditioned expression
                               expression / expression / expression    = pre and post conditioned expression
       TODO: <- ($8592) for pre-conditions
             -> ($8594) for post-conditions


        RETURNS: pre_context, core_pattern, post_context
    """
    __debug_entry("conditional expression", stream)

    # -- expression
    pattern_0 = snap_expression(stream, PatternDict)
    if pattern_0 is None:
        return None, None, None

    # -- '/'
    if not check(stream, '/'):
        # (1) expression without pre and post condition
        #     pattern_0 is already beautified by 'snap_expression()'
        return None, pattern_0, None

    # -- expression
    pattern_1 = snap_expression(stream, PatternDict)
    if pattern_1 is None:
        return None, pattern_0, None

    # -- '/'
    if not check(stream, '/'):
        # (2) expression with only a post condition
        #     NOTE: setup_post_context() marks state origins!
        return None, pattern_0, pattern_1

    # -- expression
    pattern_2 = snap_expression(stream, PatternDict)
    if pattern_2 is None:
        # (3) expression with only a pre condition
        #     NOTE: setup_pre_context() marks the state origins!
        return pattern_0, pattern_1, None
    else:
        # (4) expression with post and pre-condition
        return pattern_0, pattern_1, pattern_2
Example #34
0
def snap_conditional_expression(stream, PatternDict):
    """conditional expression: expression
                               expression / expression                 = post conditioned expression
                               expression / expression /               = pre conditioned expression
                               expression / expression / expression    = pre and post conditioned expression
       TODO: <- ($8592) for pre-conditions
             -> ($8594) for post-conditions


        RETURNS: pre_context, core_pattern, post_context
    """                     
    __debug_entry("conditional expression", stream)    

    # -- expression
    pattern_0 = snap_expression(stream, PatternDict) 
    if pattern_0 is None: 
        return None, None, None
    
    # -- '/'
    if not check(stream, '/'): 
        # (1) expression without pre and post condition
        #     pattern_0 is already beautified by 'snap_expression()'
        return None, pattern_0, None
        
    # -- expression
    pattern_1 = snap_expression(stream, PatternDict) 
    if pattern_1 is None: 
        return None, pattern_0, None
    
    # -- '/'
    if not check(stream, '/'): 
        # (2) expression with only a post condition
        #     NOTE: setup_post_context() marks state origins!
        return None, pattern_0, pattern_1

    # -- expression
    pattern_2 = snap_expression(stream, PatternDict) 
    if pattern_2 is None: 
        # (3) expression with only a pre condition
        #     NOTE: setup_pre_context() marks the state origins!
        return pattern_0, pattern_1, None
    else:
        # (4) expression with post and pre-condition
        return pattern_0, pattern_1, pattern_2
Example #35
0
def _parse_definition_head(fh, IdentifierList):

    if check(fh, "\\default"): 
        error.log("'\\default' has been replaced by keyword '\\else' since quex 0.64.9!", fh)
    elif check(fh, "\\else"): 
        pattern = None
    else:                      
        pattern = regular_expression.parse(fh)

    skip_whitespace(fh)
    check_or_die(fh, "=>", " after character set definition.")

    skip_whitespace(fh)
    identifier = read_identifier(fh, OnMissingStr="Missing identifier following '=>'.")
    error.verify_word_in_list(identifier, IdentifierList,
                              "Unrecognized specifier '%s'." % identifier, fh)
    skip_whitespace(fh)

    return pattern, identifier, SourceRef.from_FileHandle(fh)
Example #36
0
def read_character_code(fh):
    # NOTE: This function is tested with the regeression test for feature request 2251359.
    #       See directory $QUEX_PATH/TEST/2251359.
    pos = fh.tell()
    
    start = fh.read(1)
    if start == "":  
        fh.seek(pos); return -1

    elif start == "'": 
        # read an utf-8 char an get the token-id
        # Example: '+'
        if check(fh, "\\"):
            # snap_backslashed_character throws an exception if 'backslashed char' is nonsense.
            character_code = snap_backslashed_character.do(fh, ReducedSetOfBackslashedCharactersF=True)
        else:
            character_code = __read_one_utf8_code_from_stream(fh)

        if character_code is None:
            error.log("Missing utf8-character for definition of character code by character.", 
                      fh)

        elif fh.read(1) != '\'':
            error.log("Missing closing ' for definition of character code by character.", 
                      fh)

        return character_code

    if start == "U":
        if fh.read(1) != "C": fh.seek(pos); return -1
        # read Unicode Name 
        # Example: UC MATHEMATICAL_MONOSPACE_DIGIT_FIVE
        skip_whitespace(fh)
        ucs_name = __read_token_identifier(fh)
        if ucs_name == "": fh.seek(pos); return -1
        # Get the character set related to the given name. Note, the size of the set
        # is supposed to be one.
        character_code = ucs_property_db.get_character_set("Name", ucs_name)
        if type(character_code) in [str, unicode]:
            error.verify_word_in_list(ucs_name, ucs_property_db["Name"].code_point_db,
                                      "The string %s\ndoes not identify a known unicode character." % ucs_name, 
                                      fh)
        elif type(character_code) not in [int, long]:
            error.log("%s relates to more than one character in unicode database." % ucs_name, 
                      fh) 
        return character_code

    fh.seek(pos)
    character_code = read_integer(fh)
    if character_code is not None: return character_code

    # Try to interpret it as something else ...
    fh.seek(pos)
    return -1               
Example #37
0
def __parse_section(fh, descriptor, already_defined_list):
    global token_type_code_fragment_db
    assert type(already_defined_list) == list

    SubsectionList = ["name", "file_name", "standard", "distinct", "union", "inheritable", "noid"] \
                      + token_type_code_fragment_db.keys()

    position = fh.tell()
    skip_whitespace(fh)
    word = read_identifier(fh)
    if word == "":
        fh.seek(position)
        if check(fh, "}"): 
            fh.seek(position) 
            return False
        error_msg("Missing token_type section ('standard', 'distinct', or 'union').", fh)

    verify_word_in_list(word, SubsectionList, 
                        "Subsection '%s' not allowed in token_type section." % word, fh)

    if word == "name":
        if not check(fh, "="):
            error_msg("Missing '=' in token_type 'name' specification.", fh)
        descriptor.class_name, descriptor.name_space, descriptor.class_name_safe = read_namespaced_name(fh, "token_type")
        if not check(fh, ";"):
            error_msg("Missing terminating ';' in token_type 'name' specification.", fh)

    elif word == "inheritable":
        descriptor.open_for_derivation_f = True
        check_or_die(fh, ";")

    elif word == "noid":
        descriptor.token_contains_token_id_f = False;
        check_or_die(fh, ";")

    elif word == "file_name":
        if not check(fh, "="):
            error_msg("Missing '=' in token_type 'file_name' specification.", fh)
        descriptor.set_file_name(read_until_letter(fh, ";"))
        if not check(fh, ";"):
            error_msg("Missing terminating ';' in token_type 'file_name' specification.", fh)

    elif word in ["standard", "distinct", "union"]:
        if   word == "standard": parse_standard_members(fh, word, descriptor, already_defined_list)
        elif word == "distinct": parse_distinct_members(fh, word, descriptor, already_defined_list)
        elif word == "union":    parse_union_members(fh, word, descriptor, already_defined_list)

        if not check(fh, "}"):
            fh.seek(position)
            error_msg("Missing closing '}' at end of token_type section '%s'." % word, fh);

    elif word in token_type_code_fragment_db.keys():
        fragment     = code_fragment.parse(fh, word, AllowBriefTokenSenderF=False)        
        descriptor.__dict__[word] = fragment

    else: 
        assert False, "This code section section should not be reachable because 'word'\n" + \
                      "was checked to fit in one of the 'elif' cases."

    return True
Example #38
0
def snap_replacement(stream, PatternDict, StateMachineF=True):
    """Snaps a predefined pattern from the input string and returns the resulting
       state machine.
    """ 
    skip_whitespace(stream)
    pattern_name = read_identifier(stream)  
    if pattern_name == "":
        raise RegularExpressionException("Pattern replacement expression misses identifier after '{'.")
    skip_whitespace(stream)

    if not check(stream, "}"):
        raise RegularExpressionException("Pattern replacement expression misses closing '}' after '%s'." \
                                         % pattern_name)

    verify_word_in_list(pattern_name, PatternDict.keys(),
                        "Specifier '%s' not found in any preceeding 'define { ... }' section." % pattern_name, 
                        stream)

    reference = PatternDict[pattern_name]
    assert reference.__class__.__name__ == "PatternShorthand" 

    # The replacement may be a state machine or a number set
    if StateMachineF:
        # Get a cloned version of state machine
        state_machine = reference.get_state_machine()
        assert isinstance(state_machine, StateMachine)

        # It is essential that state machines defined as patterns do not 
        # have origins. Otherwise, the optimization of patterns that
        # contain pattern replacements might get confused and can
        # not find all optimizations.
        assert state_machine.has_origins() == False
            
        # A state machine, that contains pre- or post- conditions cannot be part
        # of a replacement. The addition of new post-contexts would mess up the pattern.
        ## if state_machine.has_pre_or_post_context():
        ##    error_msg("Pre- or post-conditioned pattern was used in replacement.\n" + \
        ##              "Quex's regular expression grammar does not allow this.", stream)
            
        return state_machine

    else:
        # Get a cloned version of character set
        character_set = reference.get_character_set()
        if character_set is None:
            error_msg("Replacement in character set expression must be a character set.\n"
                      "Specifier '%s' relates to a pattern state machine." % pattern_name, stream)

        if character_set.is_empty():
            error_msg("Referenced character set '%s' is empty.\nAborted." % pattern_name, stream)

        return character_set
Example #39
0
def __parse_brief(new_mode, fh):
    """ADAPTS: new_mode.pattern_action_list where new pattern action pairs 
                                            are entered.
    RETURNS: True, in case of success.
    EXITS:   in case of syntax errors.
    """
    flags = optional_flags(
        fh, "brief pattern action pair list", "", {
            "N": "pass LexemeNull to token contructor.",
            "L": "pass Lexeme to token constructor.",
            "i": "implicit token identifier definition."
        }, ["NL"])

    skip_whitespace(fh)
    prefix = read_identifier(fh)
    skip_whitespace(fh)

    lexeme_null_f = "N" in flags
    lexeme_f = "L" in flags
    implicit_tid_f = "i" in flags

    check_or_die(fh, "{", "Opening bracket required after 'brief'.")
    while not check(fh, "}"):
        skip_whitespace(fh)

        pattern = regular_expression.parse(fh)
        skip_whitespace(fh)

        position = fh.tell()
        identifier = read_identifier(fh)
        if not identifier:
            error.log("Missing identifier after regular expression.", fh)

        identifier = "%s%s" % (prefix, identifier)

        check_or_die(
            fh, ";", "Semincolon required after brief token identifier '%s'." %
            identifier)

        if implicit_tid_f: token_id_db_enter(fh, identifier)

        code = code_fragment.get_CodeUser_for_token_sending(
            fh,
            identifier,
            position,
            LexemeNullF=lexeme_null_f,
            LexemeF=lexeme_f)
        new_mode.add_pattern_action_pair(pattern, code, fh)

    return True
Example #40
0
def __parse_keyword_list(new_mode, fh):
    """ADAPTS: new_mode.pattern_action_list where new pattern action pairs 
                                            are entered.
    RETURNS: True, in case of success.
    EXITS:   in case of syntax errors.
    """
    flags = optional_flags(
        fh, "keyword_list", "", {
            "u": "make correspondent token identifiers uppercase.",
            "l": "make correspondent token identifiers lowercase.",
            "N": "pass LexemeNull to token contructor.",
            "L": "pass Lexeme to token constructor.",
            "i": "implicit token identifier definition."
        }, ["ul", "NL"])

    lexeme_null_f = "N" in flags
    lexeme_f = "L" in flags
    implicit_tid_f = "i" in flags
    lowercase_f = "l" in flags
    uppercase_f = "u" in flags

    skip_whitespace(fh)
    prefix = read_identifier(fh)
    skip_whitespace(fh)

    check_or_die(fh, "{", "Opening bracket required after 'keyword_list'.")
    while not check(fh, "}"):
        skip_whitespace(fh)
        position = fh.tell()
        identifier = read_identifier(fh)
        pattern = regular_expression.parse(StringIO("%s " % identifier))

        check_or_die(fh, ";",
                     "Semincolon required after keyword '%s'." % identifier)
        if not identifier: continue
        if uppercase_f: identifier = identifier.upper()
        elif lowercase_f: identifier = identifier.lower()

        identifier = "%s%s" % (prefix, identifier)

        if implicit_tid_f: token_id_db_enter(fh, identifier)

        code = code_fragment.get_CodeUser_for_token_sending(
            fh,
            identifier,
            position,
            LexemeNullF=lexeme_null_f,
            LexemeF=lexeme_f)
        new_mode.add_pattern_action_pair(pattern, code, fh)
    return True
Example #41
0
def parse(fh):
    descriptor = TokenTypeDescriptorCore()

    if not check(fh, "{"):
        error_msg("Missing opening '{' at begin of token_type definition", fh)

    already_defined_list = []
    position = fh.tell()
    begin_line_n = get_current_line_info_number(fh)
    result   = True
    while result == True:
        try: 
            x = fh.tell()
            fh.seek(x)
            result = parse_section(fh, descriptor, already_defined_list)
        except EndOfStreamException:
            fh.seek(position)
            error_msg("End of file reached while parsing token_type section.", fh)

        
    if not check(fh, "}"):
        fh.seek(position)
        error_msg("Missing closing '}' at end of token_type definition.", fh);

    result = TokenTypeDescriptor(descriptor, fh.name, begin_line_n)
    if     len(result.get_member_db()) == 0 \
       and result.class_name == "Token" \
       and result.token_id_type.__class__.__name__      == "CodeFragment" \
       and result.column_number_type.__class__.__name__ == "CodeFragment" \
       and result.line_number_type.__class__.__name__   == "CodeFragment":
        error_msg("Section 'token_type' does not define any members, does not\n" + \
                  "modify any standard member types, nor does it define a class\n" + \
                  "different from 'Token'.", fh)

    result.consistency_check()
    return result
Example #42
0
def parse(fh):
    descriptor = TokenTypeDescriptorCore()

    if not check(fh, "{"):
        error_msg("Missing opening '{' at begin of token_type definition", fh)

    already_defined_list = []
    position = fh.tell()
    begin_line_n = get_current_line_info_number(fh)
    result = True
    while result == True:
        try:
            x = fh.tell()
            fh.seek(x)
            result = parse_section(fh, descriptor, already_defined_list)
        except EndOfStreamException:
            fh.seek(position)
            error_msg("End of file reached while parsing token_type section.",
                      fh)

    if not check(fh, "}"):
        fh.seek(position)
        error_msg("Missing closing '}' at end of token_type definition.", fh)

    result = TokenTypeDescriptor(descriptor, fh.name, begin_line_n)
    if     len(result.get_member_db()) == 0 \
       and result.class_name == "Token" \
       and result.token_id_type.__class__.__name__      == "CodeFragment" \
       and result.column_number_type.__class__.__name__ == "CodeFragment" \
       and result.line_number_type.__class__.__name__   == "CodeFragment":
        error_msg("Section 'token_type' does not define any members, does not\n" + \
                  "modify any standard member types, nor does it define a class\n" + \
                  "different from 'Token'.", fh)

    result.consistency_check()
    return result
Example #43
0
def __create_mode_transition_and_token_sender(fh, Command):
    assert Command in ["GOTO", "GOSUB", "GOUP"]

    position = fh.tell()
    LanguageDB = Setup.language_db
    target_mode = ""
    token_sender = ""
    if check(fh, "("):
        skip_whitespace(fh)
        if Command != "GOUP":
            target_mode = __read_token_identifier(fh)
            skip_whitespace(fh)

        if check(fh, ")"):
            token_sender = ""

        elif Command == "GOUP" or check(fh, ","):
            skip_whitespace(fh)
            token_name = __read_token_identifier(fh)
            skip_whitespace(fh)

            if check(fh, ","):
                error_msg(
                    "Missing opening '(' after token name specification.\n"
                    "Note, that since version 0.50.1 the syntax for token senders\n"
                    "inside brief mode transitions is like:\n\n"
                    "     => GOTO(MYMODE, QUEX_TKN_MINE(Argument0, Argument1, ...));\n",
                    fh)

            token_sender = __create_token_sender_by_token_name(fh, token_name)

            if check(fh, ")") == False:
                error_msg("Missing closing ')' or ',' after '%s'." % Command,
                          fh)

        else:
            fh.seek(position)
            error_msg("Missing closing ')' or ',' after '%s'." % Command, fh)

    if check(fh, ";") == False:
        error_msg("Missing ')' or ';' after '%s'." % Command, fh)

    if Command in ["GOTO", "GOSUB"] and target_mode == "":
        error_msg(
            "Command %s requires at least one argument: The target mode." %
            Command, fh)

    # Code for mode change
    if Command == "GOTO": txt = LanguageDB.MODE_GOTO(target_mode)
    elif Command == "GOSUB": txt = LanguageDB.MODE_GOSUB(target_mode)
    else: txt = LanguageDB.MODE_GOUP()

    # Code for token sending
    txt += token_sender

    return txt
Example #44
0
def snap_bracketed_expression(stream, PatternDict):
    position = stream.tell()
    result = snap_expression(stream, PatternDict)
    if not check(stream, ")"):
        stream.seek(position)
        remainder_txt = stream.readline().replace("\n", "").replace("\r", "")
        raise RegularExpressionException("Missing closing ')' after expression; found '%s'.\n" % remainder_txt \
                                         + "Note, that patterns end with the first non-quoted whitespace.\n" \
                                         + "Also, closing brackets in quotes do not close a syntax block.")

    if result is None:
        length = stream.tell() - position
        stream.seek(position)
        raise RegularExpressionException("expression in brackets has invalid syntax '%s'" % \
                                         stream.read(length))
    return result
Example #45
0
def snap_bracketed_expression(stream, PatternDict):
    position = stream.tell()
    result = snap_expression(stream, PatternDict)
    if not check(stream, ")"): 
        stream.seek(position)
        remainder_txt = stream.readline().replace("\n", "").replace("\r", "")
        raise RegularExpressionException("Missing closing ')' after expression; found '%s'.\n" % remainder_txt \
                                         + "Note, that patterns end with the first non-quoted whitespace.\n" \
                                         + "Also, closing brackets in quotes do not close a syntax block.")

    if result is None:
        length = stream.tell() - position
        stream.seek(position)
        raise RegularExpressionException("expression in brackets has invalid syntax '%s'" % \
                                         stream.read(length))
    return result
Example #46
0
def __create_mode_transition_and_token_sender(fh, Op):
    assert Op in ["GOTO", "GOSUB", "GOUP"]

    position     = fh.tell()
    target_mode  = ""
    token_sender = ""
    if check(fh, "("):
        skip_whitespace(fh)
        if Op != "GOUP":
            target_mode = __read_token_identifier(fh)
            skip_whitespace(fh)

        if check(fh, ")"):
            token_sender = ""

        elif Op == "GOUP" or check(fh, ","):
            skip_whitespace(fh)
            token_name = __read_token_identifier(fh)
            skip_whitespace(fh)

            if check(fh, ","):
                error.log("Missing opening '(' after token name specification.\n" 
                          "Note, that since version 0.50.1 the syntax for token senders\n"
                          "inside brief mode transitions is like:\n\n"
                          "     => GOTO(MYMODE, QUEX_TKN_MINE(Argument0, Argument1, ...));\n", 
                          fh)

            token_sender = __create_token_sender_by_token_name(fh, token_name) 

            if check(fh, ")") == False:
                error.log("Missing closing ')' or ',' after '%s'." % Op, 
                          fh)

        else:
            fh.seek(position)
            error.log("Missing closing ')' or ',' after '%s'." % Op, fh)

    if check(fh, ";") == False:
        error.log("Missing ')' or ';' after '%s'." % Op, fh)

    if Op in ["GOTO", "GOSUB"] and target_mode == "": 
        error.log("Op %s requires at least one argument: The target mode." % Op, 
                  fh)

    # Code for mode change
    if   Op == "GOTO":  txt = Lng.MODE_GOTO(target_mode)
    elif Op == "GOSUB": txt = Lng.MODE_GOSUB(target_mode)
    else:                    txt = Lng.MODE_GOUP()

    # Code for token sending
    txt += token_sender

    return txt
Example #47
0
def __parse_function_argument_list(fh, ReferenceName):
    argument_list = []
    position = fh.tell()
    try:
        # Read argument list
        if check(fh, "(") == False:
            return []

        text = ""
        while 1 + 1 == 2:
            tmp = fh.read(1)
            if tmp == ")":
                break
            elif tmp in ["(", "[", "{"]:
                closing_bracket = {"(": ")", "[": "]", "{": "}"}[tmp]
                text += tmp + read_until_closing_bracket(
                    fh, tmp, closing_bracket) + closing_bracket
            elif tmp == "\"":
                text += tmp + read_until_closing_bracket(
                    fh, "", "\"", IgnoreRegions=[]) + "\""
            elif tmp == "'":
                text += tmp + read_until_closing_bracket(
                    fh, "", "'", IgnoreRegions=[]) + "'"
            elif tmp == ",":
                argument_list.append(text)
                text = ""
            elif tmp == "":
                fh.seek(position)
                error_msg(
                    "End of file reached while parsing argument list for %s." %
                    ReferenceName, fh)
            else:
                text += tmp

        if text != "": argument_list.append(text)

        argument_list = map(lambda arg: arg.strip(), argument_list)
        argument_list = filter(lambda arg: arg != "", argument_list)
        return argument_list

    except EndOfStreamException:
        fh.seek(position)
        error_msg("End of file reached while parsing token shortcut.", fh)
def snap_property_set(stream):
    position = stream.tell()
    x = stream.read(2)
    if   x == "\\P": 
        stream.seek(position)
        return property.do(stream)
    elif x == "\\N": 
        stream.seek(position)
        return property.do_shortcut(stream, "N", "na") # UCS Property: Name
    elif x == "\\G": 
        stream.seek(position)
        return property.do_shortcut(stream, "G", "gc") # UCS Property: General_Category
    elif x == "\\E": 
        skip_whitespace(stream)
        if check(stream, "{") == False:
            error_msg("Missing '{' after '\\E'.", stream)
        encoding_name = __snap_until(stream, "}").strip()
        return codec_db.get_supported_unicode_character_set(encoding_name, FH=stream)
    else:
        stream.seek(position)
        return None
Example #49
0
def __parse_function_argument_list(fh, ReferenceName):
    argument_list = []
    position = fh.tell()
    try:
        # Read argument list
        if check(fh, "(") == False:
            return []

        text = ""
        while 1 + 1 == 2:
            tmp = fh.read(1)
            if   tmp == ")": 
                break
            elif tmp in ["(", "[", "{"]:
                closing_bracket = {"(": ")", "[": "]", "{": "}"}[tmp]
                text += tmp + read_until_closing_bracket(fh, tmp, closing_bracket) + closing_bracket
            elif tmp == "\"":
                text += tmp + read_until_closing_bracket(fh, "", "\"", IgnoreRegions = []) + "\"" 
            elif tmp == "'":
                text += tmp + read_until_closing_bracket(fh, "", "'", IgnoreRegions = []) + "'" 
            elif tmp == ",":
                argument_list.append(text)
                text = ""
            elif tmp == "":
                fh.seek(position)
                error.error_eof("argument list for %s" % ReferenceName, fh)
            else:
                text += tmp

        if text != "": argument_list.append(text)

        argument_list = map(lambda arg:    arg.strip(), argument_list)
        argument_list = filter(lambda arg: arg != "",   argument_list)
        return argument_list

    except EndOfStreamException:
        fh.seek(position)
        error.error_eof("token", fh)
Example #50
0
def parse_token_id_definitions(fh, NamesOnlyF=False):
    """NamesOnlyF == True: Allow only definition of names, no numeric values 
                           may be assigned to it.

       'NamesOnlyF' indicates that data is not written to the global 
       'token_id_db'. Then only a list of names is returned.
    """
    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    prefix = Setup.token_id_prefix
    prefix_plain = Setup.token_id_prefix_plain  # i.e. without name space included

    if NamesOnlyF:
        result = set()

    skip_whitespace(fh)
    if not check(fh, "{"):
        error.log("Missing opening '{' for after 'token' section identifier.",
                  fh)

    while check(fh, "}") == False:
        skip_whitespace(fh)

        candidate = read_identifier(
            fh, TolerantF=True, OnMissingStr="Missing valid token identifier.")

        # -- check the name, if it starts with the token prefix paste a warning
        suspicious_prefix = None
        if len(prefix) != 0 and candidate.find(prefix) == 0:
            suspicious_prefix = prefix
        elif len(prefix_plain) != 0 and candidate.find(prefix_plain) == 0:
            suspicious_prefix = prefix_plain

        if suspicious_prefix is not None:
            error.warning("Token identifier '%s' starts with token prefix '%s'.\n" \
                      % (candidate, suspicious_prefix) \
                      + "Token prefix is mounted automatically. This token id appears in the source\n" \
                      + "code as '%s%s'." \
                      % (prefix, candidate), \
                      fh,
                      SuppressCode=NotificationDB.warning_token_id_prefix_appears_in_token_id_name)

        skip_whitespace(fh)

        if NamesOnlyF:
            result.add(prefix + candidate)
            if check(fh, ";") == False:
                error.log("Missing ';' after token identifier '%s'.\n" \
                          % candidate, fh)
            continue

        # Parse a possible numeric value after '='
        numeric_value = None
        if check(fh, "="):
            skip_whitespace(fh)
            numeric_value = read_integer(fh)
            if numeric_value is None:
                error.log(
                    "Missing number after '=' for token identifier '%s'." %
                    candidate, fh)

        if check(fh, ";") == False:
            error.log("Missing ';' after token identifier '%s'." % candidate,
                      fh)

        if not NamesOnlyF:
            ti = TokenInfo(candidate,
                           numeric_value,
                           SourceReference=SourceRef.from_FileHandle(fh))
            blackboard.token_id_db[candidate] = ti

    if NamesOnlyF:
        return sorted(list(result))
    else:
        return  # Changes are applied to 'blackboard.token_id_db'
Example #51
0
def do(fh):
    """Parses pattern definitions of the form:
   
          [ \t]                                       => grid 4;
          [:intersection([:alpha:], [\X064-\X066]):]  => space 1;

       In other words the right hand side *must* be a character set.
          
    """
    indentation_setup = IndentationSetup(fh)

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    skip_whitespace(fh)

    while 1 + 1 == 2:
        skip_whitespace(fh)

        if check(fh, ">"): 
            indentation_setup.seal()
            indentation_setup.consistency_check(fh)
            return indentation_setup
        
        # A regular expression state machine
        pattern_str, pattern = regular_expression.parse(fh)

        skip_whitespace(fh)
        if not check(fh, "=>"):
            error_msg("Missing '=>' after character set definition.", fh)

        skip_whitespace(fh)
        identifier = read_identifier(fh)
        if identifier == "":
            error_msg("Missing identifier for indentation element definition.", fh)

        verify_word_in_list(identifier, 
                            ["space", "grid", "bad", "newline", "suppressor"],
                            "Unrecognized indentation specifier '%s'." % identifier, fh)

        trigger_set = None
        if identifier in ["space", "bad", "grid"]:
            if len(pattern.sm.states) != 2:
                error_msg("For indentation '%s' only patterns are addmissible which\n" % identifier + \
                          "can be matched by a single character, e.g. \" \" or [a-z].", fh)
            transition_map = pattern.sm.get_init_state().transitions().get_map()
            assert len(transition_map) == 1
            trigger_set = transition_map.values()[0]

        skip_whitespace(fh)
        if identifier == "space":
            value = read_integer(fh)
            if value is not None: 
                indentation_setup.specify_space(pattern_str, trigger_set, value, fh)
            else:
                # not a number received, is it an identifier?
                variable = read_identifier(fh)
                if variable != "":
                    indentation_setup.specify_space(pattern_str, trigger_set, variable, fh)
                else:
                    indentation_setup.specify_space(pattern_str, trigger_set, 1, fh)

        elif identifier == "grid":
            value = read_integer(fh)
            if value is not None: 
                indentation_setup.specify_grid(pattern_str, trigger_set, value, fh)
            else:
                # not a number received, is it an identifier?
                skip_whitespace(fh)
                variable = read_identifier(fh)
                if variable != "":
                    indentation_setup.specify_grid(pattern_str, trigger_set, variable, fh)
                else:
                    error_msg("Missing integer or variable name after keyword 'grid'.", fh) 

        elif identifier == "bad":
            indentation_setup.specify_bad(pattern_str, trigger_set, fh)

        elif identifier == "newline":
            indentation_setup.specify_newline(pattern_str, pattern.sm, fh)

        elif identifier == "suppressor":
            indentation_setup.specify_suppressor(pattern_str, pattern.sm, fh)

        else:
            assert False, "Unreachable code reached."

        if not check(fh, ";"):
            error_msg("Missing ';' after indentation '%s' specification." % identifier, fh)
Example #52
0
def parse_variable_definition(fh, GroupF=False, already_defined_list=[]):
    """PURPOSE: Parsing of a variable definition consisting of 'type' and 'name.
                Members can be mentioned together in a group, which means that
                they can appear simultaneously. Possible expresions are

                (1) single variables:

                              name0 : type;
                              name1 : type[32];
                              name2 : type*;

                (2) combined variables

                              {
                                  sub_name0 : type0;
                                  sub_name1 : type[64];
                                  sub_name2 : type1*;
                              }

       ARGUMENTS: 

        'GroupF'               allows to have 'nested variable groups' in curly brackets

        'already_defined_list' informs about variable names that have been already
                               chosen. It is only used for groups.

       RETURNS:
                 None        on failure to pass a variable definition.
                 array       when a single variable definition was found. 
                                array[0] = UserCodeFragment containing the type. 
                                array[1] = name of the variable.
                 dictionary  if it was a combined variable definition. The dictionary
                               maps: (variable name) ---> (UserCodeFragment with type)
    
    """
    position = fh.tell()

    skip_whitespace(fh)
    name_str = read_identifier(fh)
    if name_str == "":
        if not GroupF or not check(fh, "{"): 
            fh.seek(position); 
            return None
        sub_db = parse_variable_definition_list(fh, "Concurrent union variables", already_defined_list)
        if not check(fh, "}"): 
            fh.seek(position)
            error_msg("Missing closing '}' after concurrent variable definition.", fh)
        return [ sub_db ]

    else:
        name_str = name_str.strip()
        if not check(fh, ":"): error_msg("Missing ':' after identifier '%s'." % name_str, fh)
        
        if fh.read(1).isspace() == False:
            error_msg("Missing whitespace after ':' after identifier '%s'.\n" % name_str \
                    + "The notation has to be: variable-name ':' type ';'.", fh)

        type_str, i = read_until_letter(fh, ";", Verbose=True)
        if i == -1: error_msg("missing ';'", fh)
        type_str = type_str.strip()

        return [ CodeUser(type_str, SourceRef.from_FileHandle(fh)), name_str ]
Example #53
0
File: core.py Project: xxyzzzq/quex
def parse_token_id_definitions(fh, NamesOnlyF=False):
    """NamesOnlyF == True: Allow only definition of names, no numeric values 
                           may be assigned to it.

       'NamesOnlyF' indicates that data is not written to the global 
       'token_id_db'. Then only a list of names is returned.
    """
    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    prefix       = Setup.token_id_prefix
    prefix_plain = Setup.token_id_prefix_plain # i.e. without name space included

    if NamesOnlyF: 
        result = set()

    skip_whitespace(fh)
    if not check(fh, "{"):
        error.log("Missing opening '{' for after 'token' section identifier.", 
                  fh)

    while check(fh, "}") == False:
        skip_whitespace(fh)

        candidate = read_identifier(fh, TolerantF=True, OnMissingStr="Missing valid token identifier.")

        # -- check the name, if it starts with the token prefix paste a warning
        suspicious_prefix = None
        if len(prefix) != 0 and candidate.find(prefix) == 0:       
            suspicious_prefix = prefix
        elif len(prefix_plain) != 0 and candidate.find(prefix_plain) == 0: 
            suspicious_prefix = prefix_plain

        if suspicious_prefix is not None:
            error.warning("Token identifier '%s' starts with token prefix '%s'.\n" \
                      % (candidate, suspicious_prefix) \
                      + "Token prefix is mounted automatically. This token id appears in the source\n" \
                      + "code as '%s%s'." \
                      % (prefix, candidate), \
                      fh, 
                      SuppressCode=NotificationDB.warning_token_id_prefix_appears_in_token_id_name)

        skip_whitespace(fh)

        if NamesOnlyF:
            result.add(prefix + candidate)
            if check(fh, ";") == False:
                error.log("Missing ';' after token identifier '%s'.\n" \
                          % candidate, fh)
            continue

        # Parse a possible numeric value after '='
        numeric_value = None
        if check(fh, "="):
            skip_whitespace(fh)
            numeric_value = read_integer(fh)
            if numeric_value is None:
                error.log("Missing number after '=' for token identifier '%s'." % candidate, 
                          fh)

        if check(fh, ";") == False:
            error.log("Missing ';' after token identifier '%s'." % candidate, 
                      fh)

        if not NamesOnlyF:
            ti = TokenInfo(candidate, numeric_value, 
                           SourceReference=SourceRef.from_FileHandle(fh))
            blackboard.token_id_db[candidate] = ti

    if NamesOnlyF:
        return sorted(list(result))
    else:
        return # Changes are applied to 'blackboard.token_id_db'
Example #54
0
def  snap_case_folded_pattern(sh, PatternDict, NumberSetF=False):
    """Parse a case fold expression of the form \C(..){ R } or \C{ R }.
       Assume that '\C' has been snapped already from the stream.

       See function ucs_case_fold_parser.get_fold_set() for details
       about case folding.
    """
    def __add_intermediate_states(sm, character_list, start_state_idx, target_state_idx):
        next_idx = start_state_idx
        for letter in character_list[:-1]:
            next_idx = sm.add_transition(next_idx, letter)
        sm.add_transition(next_idx, character_list[-1], target_state_idx)

    def __add_case_fold(sm, Flags, trigger_set, start_state_idx, target_state_idx):
        for interval in trigger_set.get_intervals(PromiseToTreatWellF=True):
            for i in range(interval.begin, interval.end):
                fold = ucs_case_fold.get_fold_set(i, Flags)
                for x in fold:
                    if type(x) == list:
                        __add_intermediate_states(sm, x, start_state_idx, target_state_idx)
                    else:
                        trigger_set.add_interval(Interval(x, x+1))


    pos = sh.tell()
    skip_whitespace(sh)
    # -- parse the optional options in '(' ')' brackets
    if not check(sh, "("):
        # By default 'single' and 'multi' character case folds are active
        if NumberSetF: flag_txt = "s"
        else:          flag_txt = "sm"
    else:
        flag_txt = read_until_character(sh, ")")

        if flag_txt == "":
            sh.seek(pos)
            error_msg("Missing closing ')' in case fold expression.", sh)

        flag_txt = flag_txt.replace(" ", "").replace("\t", "").replace("\n", "")

        for letter in flag_txt:
            if letter not in "smt":
                sh.seek(pos)
                error_msg("Letter '%s' not permitted as case fold option.\n" % letter + \
                          "Options are:  's' for simple case fold.\n" + \
                          "              'm' for multi character sequence case fold.\n" + \
                          "              't' for special turkish case fold rules.", sh)

            if NumberSetF and letter == "m":
                sh.seek(pos)
                error_msg("Option 'm' not permitted as case fold option in set expression.\n" + \
                          "Set expressions cannot absorb multi character sequences.", sh)

        skip_whitespace(sh)


    result = snap_curly_bracketed_expression(sh, PatternDict, "case fold operator", "C")[0]
    if NumberSetF:
        trigger_set = result.get_number_set()
        if trigger_set is None:
            error_msg("Expression in case fold does not result in character set.\n" + 
                      "The content in '\\C{content}' may start with '[' or '[:'.", sh)

        # -- perform the case fold for Sets!
        for interval in trigger_set.get_intervals(PromiseToTreatWellF=True):
            for i in range(interval.begin, interval.end):
                fold = ucs_case_fold.get_fold_set(i, flag_txt)
                for x in fold:
                    assert type(x) != list
                    trigger_set.add_interval(Interval(x, x+1))

        result = trigger_set

    else:
        # -- perform the case fold for State Machines!
        for state_idx, state in result.states.items():
            for target_state_idx, trigger_set in state.target_map.get_map().items():
                __add_case_fold(result, flag_txt, trigger_set, state_idx, target_state_idx)

    return result
Example #55
0
def parse_variable_definition(fh, GroupF=False, already_defined_list=[]):
    """PURPOSE: Parsing of a variable definition consisting of 'type' and 'name.
                Members can be mentioned together in a group, which means that
                they can appear simultaneously. Possible expresions are

                (1) single variables:

                              name0 : type;
                              name1 : type[32];
                              name2 : type*;

                (2) combined variables

                              {
                                  sub_name0 : type0;
                                  sub_name1 : type[64];
                                  sub_name2 : type1*;
                              }

       ARGUMENTS: 

        'GroupF'               allows to have 'nested variable groups' in curly brackets

        'already_defined_list' informs about variable names that have been already
                               chosen. It is only used for groups.

       RETURNS:
                 None        on failure to pass a variable definition.
                 array       when a single variable definition was found. 
                                array[0] = UserCodeFragment containing the type. 
                                array[1] = name of the variable.
                 dictionary  if it was a combined variable definition. The dictionary
                               maps: (variable name) ---> (UserCodeFragment with type)
    
    """
    position = fh.tell()

    skip_whitespace(fh)
    name_str = read_identifier(fh)
    if name_str == "":
        if not GroupF or not check(fh, "{"):
            fh.seek(position)
            return None
        sub_db = parse_variable_definition_list(fh,
                                                "Concurrent union variables",
                                                already_defined_list)
        if not check(fh, "}"):
            fh.seek(position)
            error_msg(
                "Missing closing '}' after concurrent variable definition.",
                fh)
        return [sub_db]

    else:
        name_str = name_str.strip()
        if not check(fh, ":"):
            error_msg("Missing ':' after identifier '%s'." % name_str, fh)

        if fh.read(1).isspace() == False:
            error_msg("Missing whitespace after ':' after identifier '%s'.\n" % name_str \
                    + "The notation has to be: variable-name ':' type ';'.", fh)

        type_str, i = read_until_letter(fh, ";", Verbose=True)
        if i == -1: error_msg("missing ';'", fh)
        type_str = type_str.strip()

        return [CodeUser(type_str, SourceRef.from_FileHandle(fh)), name_str]
Example #56
0
def read_character_code(fh):
    # NOTE: This function is tested with the regeression test for feature request 2251359.
    #       See directory $QUEX_PATH/TEST/2251359.
    pos = fh.tell()

    start = fh.read(1)
    if start == "":
        fh.seek(pos)
        return -1

    elif start == "'":
        # read an utf-8 char an get the token-id
        # Example: '+'
        if check(fh, "\\"):
            # snap_backslashed_character throws an exception if 'backslashed char' is nonsense.
            character_code = snap_backslashed_character.do(
                fh, ReducedSetOfBackslashedCharactersF=True)
        else:
            character_code = __read_one_utf8_code_from_stream(fh)

        if character_code is None:
            error_msg(
                "Missing utf8-character for definition of character code by character.",
                fh)

        elif fh.read(1) != '\'':
            error_msg(
                "Missing closing ' for definition of character code by character.",
                fh)

        return character_code

    if start == "U":
        if fh.read(1) != "C":
            fh.seek(pos)
            return -1
        # read Unicode Name
        # Example: UC MATHEMATICAL_MONOSPACE_DIGIT_FIVE
        skip_whitespace(fh)
        ucs_name = __read_token_identifier(fh)
        if ucs_name == "":
            fh.seek(pos)
            return -1
        # Get the character set related to the given name. Note, the size of the set
        # is supposed to be one.
        character_code = ucs_property_db.get_character_set("Name", ucs_name)
        if type(character_code) in [str, unicode]:
            verify_word_in_list(
                ucs_name, ucs_property_db["Name"].code_point_db,
                "The string %s\ndoes not identify a known unicode character." %
                ucs_name, fh)
        elif type(character_code) not in [int, long]:
            error_msg(
                "%s relates to more than one character in unicode database." %
                ucs_name, fh)
        return character_code

    fh.seek(pos)
    character_code = read_integer(fh)
    if character_code is not None: return character_code

    # Try to interpret it as something else ...
    fh.seek(pos)
    return -1