Example #1
0
def parse(fh):
    """This function parses a mode description and enters it into the 
       'blackboard.mode_description_db'. Once all modes are parsed
       they can be translated into 'real' modes and are located in
       'blackboard.mode_db'. 
    """

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    skip_whitespace(fh)
    mode_name = read_identifier(
        fh, OnMissingStr="Missing identifier at beginning of mode definition.")

    # NOTE: constructor does register this mode in the mode_db
    new_mode = ModeDescription(mode_name, SourceRef.from_FileHandle(fh))

    # (*) inherited modes / option_db
    skip_whitespace(fh)
    dummy = fh.read(1)
    if dummy not in [":", "{"]:
        error.log("missing ':' or '{' after mode '%s'" % mode_name, fh)

    if dummy == ":":
        __parse_option_list(new_mode, fh)

    # (*) read in pattern-action pairs and events
    while __parse_element(new_mode, fh):
        pass
Example #2
0
def __parse_base_mode_list(fh, new_mode):
    new_mode.base_modes = []
    trailing_comma_f    = False
    while 1 + 1 == 2:
        if   check(fh, "{"): fh.seek(-1, 1); break
        elif check(fh, "<"): fh.seek(-1, 1); break

        skip_whitespace(fh)
        identifier = read_identifier(fh)
        if identifier == "": break

        new_mode.base_modes.append(identifier)
        trailing_comma_f = False
        if not check(fh, ","): break
        trailing_comma_f = True


    if trailing_comma_f:
        error_msg("Trailing ',' after base mode '%s'." % new_mode.base_modes[-1], fh, 
                  DontExitF=True, WarningF=True)
        
    elif len(new_mode.base_modes) != 0:
        # This check is a 'service' -- for those who follow the old convention
        pos = fh.tell()
        skip_whitespace(fh)
        dummy_identifier = read_identifier(fh)
        if dummy_identifier != "":
            error_msg("Missing separating ',' between base modes '%s' and '%s'.\n" \
                      % (new_mode.base_modes[-1], dummy_identifier) + \
                      "(The comma separator is mandatory since quex 0.53.1)", fh)
        fh.seek(pos)
Example #3
0
def __parse_base_mode_list(fh, new_mode):
    new_mode.derived_from_list = []
    trailing_comma_f = False
    while 1 + 1 == 2:
        if check(fh, "{"):
            fh.seek(-1, 1)
            break
        elif check(fh, "<"):
            fh.seek(-1, 1)
            break

        skip_whitespace(fh)
        identifier = read_identifier(fh)
        if identifier == "": break

        new_mode.derived_from_list.append(identifier)
        trailing_comma_f = False
        if not check(fh, ","): break
        trailing_comma_f = True

    if trailing_comma_f:
        error.warning(
            "Trailing ',' after base mode '%s'." %
            new_mode.derived_from_list[-1], fh)

    elif len(new_mode.derived_from_list) != 0:
        # This check is a 'service' -- for those who follow the old convention
        pos = fh.tell()
        skip_whitespace(fh)
        dummy_identifier = read_identifier(fh)
        if dummy_identifier != "":
            error.log("Missing separating ',' between base modes '%s' and '%s'.\n" \
                      % (new_mode.derived_from_list[-1], dummy_identifier) + \
                      "(The comma separator is mandatory since quex 0.53.1)", fh)
        fh.seek(pos)
Example #4
0
def parse(fh, CodeFragmentName, 
          ErrorOnFailureF=True, AllowBriefTokenSenderF=True, ContinueF=True):
    """RETURNS: An object of class CodeUser containing
                line number, filename, and the code fragment.

                None in case of failure.
    """
    assert type(ErrorOnFailureF)        == bool
    assert type(AllowBriefTokenSenderF) == bool

    skip_whitespace(fh)

    word = fh.read(2)
    if len(word) >= 1 and word[0] == "{":
        if len(word) > 1: fh.seek(-1, 1) # unput the second character
        return __parse_normal(fh, CodeFragmentName)

    elif AllowBriefTokenSenderF and word == "=>":
        return __parse_brief_token_sender(fh, ContinueF)

    elif not ErrorOnFailureF:
        fh.seek(-2,1)
        return None
    else:
        error.log("Missing code fragment after %s definition." % CodeFragmentName, 
                  fh)
Example #5
0
def __parse_section(fh, descriptor, already_defined_list):
    global token_type_code_fragment_db
    assert type(already_defined_list) == list

    SubsectionList = ["name", "file_name", "standard", "distinct", "union", "inheritable", "noid"] \
                      + token_type_code_fragment_db.keys()

    position = fh.tell()
    skip_whitespace(fh)
    word = read_identifier(fh)
    if word == "":
        fh.seek(position)
        if check(fh, "}"): 
            fh.seek(position) 
            return False
        error_msg("Missing token_type section ('standard', 'distinct', or 'union').", fh)

    verify_word_in_list(word, SubsectionList, 
                        "Subsection '%s' not allowed in token_type section." % word, fh)

    if word == "name":
        if not check(fh, "="):
            error_msg("Missing '=' in token_type 'name' specification.", fh)
        descriptor.class_name, descriptor.name_space, descriptor.class_name_safe = read_namespaced_name(fh, "token_type")
        if not check(fh, ";"):
            error_msg("Missing terminating ';' in token_type 'name' specification.", fh)

    elif word == "inheritable":
        descriptor.open_for_derivation_f = True
        check_or_die(fh, ";")

    elif word == "noid":
        descriptor.token_contains_token_id_f = False;
        check_or_die(fh, ";")

    elif word == "file_name":
        if not check(fh, "="):
            error_msg("Missing '=' in token_type 'file_name' specification.", fh)
        descriptor.set_file_name(read_until_letter(fh, ";"))
        if not check(fh, ";"):
            error_msg("Missing terminating ';' in token_type 'file_name' specification.", fh)

    elif word in ["standard", "distinct", "union"]:
        if   word == "standard": parse_standard_members(fh, word, descriptor, already_defined_list)
        elif word == "distinct": parse_distinct_members(fh, word, descriptor, already_defined_list)
        elif word == "union":    parse_union_members(fh, word, descriptor, already_defined_list)

        if not check(fh, "}"):
            fh.seek(position)
            error_msg("Missing closing '}' at end of token_type section '%s'." % word, fh);

    elif word in token_type_code_fragment_db.keys():
        fragment     = code_fragment.parse(fh, word, AllowBriefTokenSenderF=False)        
        descriptor.__dict__[word] = fragment

    else: 
        assert False, "This code section section should not be reachable because 'word'\n" + \
                      "was checked to fit in one of the 'elif' cases."

    return True
Example #6
0
def parse(fh):
    """This function parses a mode description and enters it into the 
       'blackboard.mode_description_db'. Once all modes are parsed
       they can be translated into 'real' modes and are located in
       'blackboard.mode_db'. 
    """

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    skip_whitespace(fh)
    mode_name = read_identifier(fh, OnMissingStr="Missing identifier at beginning of mode definition.")

    # NOTE: constructor does register this mode in the mode_db
    new_mode  = ModeDescription(mode_name, SourceRef.from_FileHandle(fh))

    # (*) inherited modes / option_db
    skip_whitespace(fh)
    dummy = fh.read(1)
    if dummy not in [":", "{"]:
        error_msg("missing ':' or '{' after mode '%s'" % mode_name, fh)

    if dummy == ":":
        __parse_option_list(new_mode, fh)

    # (*) read in pattern-action pairs and events
    while __parse_element(new_mode, fh): 
        pass
Example #7
0
def __parse_property_expression(stream, PropertyLetter, EqualConditionPossibleF=True):
    """Parses an expression of the form '\? { X [ = Y] }' where
       ? = PropertyLetter. If the '=' operator is present then
       two fields are returned first = left hand side, second = 
       right hand side. Othewise an element is returned.
    """
    assert len(PropertyLetter) == 1
    assert type(PropertyLetter) == str
    assert type(EqualConditionPossibleF) == bool

    # verify '\?'
    x = stream.read(2)
    if x != "\\" + PropertyLetter: 
        raise RegularExpressionException("Unicode property letter '\\%s' expected, received '%s'." % x)
    
    skip_whitespace(stream)

    x = stream.read(1)
    if x != "{": 
        raise RegularExpressionException("Unicode property '\\%s' not followed by '{'." % PropertyLetter)

    content = __snap_until(stream, "}")
    
    fields = content.split("=")

    if len(fields) == 0:
        raise RegularExpressionException("Unicode property expression '\\%s{}' cannot have no content.")

    if len(fields) > 2:
        raise RegularExpressionException("Unicode property expression '\\%s' can have at maximum one '='.")

    if not EqualConditionPossibleF and len(fields) == 2:
        raise RegularExpressionException("Unicode property expression '\\%s' does not allow '=' conditions")

    return map(lambda x: x.strip(), fields)
Example #8
0
def _parse_pattern(fh):
    name = read_identifier(fh, 
                           OnMissingStr="Missing identifier for pattern definition.")

    if blackboard.shorthand_db.has_key(name):
        error.log("Second definition of pattern '%s'.\n" % name + \
                  "Pattern names must be unique.", fh)

    skip_whitespace(fh)

    if check(fh, "}"): 
        error.log("Missing regular expression for pattern definition '%s'." % \
                  name, fh)

    # No encoding transformation, here. Transformation happens after 
    # expansion in a mode.
    pattern = regular_expression.parse(fh, AllowNothingIsFineF = True) 

    if pattern.has_pre_or_post_context():
        error.log("Pattern definition with pre- and/or post-context.\n" + \
                  "Pre- and Post-Contexts can only be defined inside mode definitions.", fh)
    state_machine = pattern.extract_sm()

    value = PatternShorthand(name, state_machine, SourceRef.from_FileHandle(fh), 
                             pattern.pattern_string())

    return name, value
Example #9
0
def __parse(fh, result, IndentationSetupF=False):
    """Parses pattern definitions of the form:
   
          [ \t]                                       => grid 4;
          [:intersection([:alpha:], [\X064-\X066]):]  => space 1;

       In other words the right hand side *must* be a character set.
    """

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    while 1 + 1 == 2:
        skip_whitespace(fh)
        if check(fh, ">"): 
            break
        
        # A regular expression state machine
        pattern, identifier, sr = __parse_definition_head(fh, result)
        if pattern is None and IndentationSetupF:
            error.log("Keyword '\\else' cannot be used in indentation setup.", fh)

        # '__parse_definition_head()' ensures that only identifiers mentioned in 
        # 'result' are accepted. 
        if not IndentationSetupF:
            value = read_value_specifier(fh, identifier, 1)
            result.specify(identifier, pattern, value, sr)
        else:
            result.specify(identifier, pattern, sr)

        if not check(fh, ";"):
            error.log("Missing ';' after '%s' specification." % identifier, fh)

    return result
Example #10
0
File: mode.py Project: xxyzzzq/quex
def __parse_element(new_mode, fh):
    """Returns: False, if a closing '}' has been found.
                True, else.
    """
    position = fh.tell()
    try:
        description = "pattern or event handler" 

        skip_whitespace(fh)
        # NOTE: Do not use 'read_word' since we need to continue directly after
        #       whitespace, if a regular expression is to be parsed.
        position = fh.tell()

        word = read_until_whitespace(fh)
        if word == "}": return False

        # -- check for 'on_entry', 'on_exit', ...
        if __parse_event(new_mode, fh, word): return True

        fh.seek(position)
        description = "start of mode element: regular expression"
        pattern     = regular_expression.parse(fh)
        pattern.set_source_reference(SourceRef.from_FileHandle(fh, new_mode.name))

        position    = fh.tell()
        description = "start of mode element: code fragment for '%s'" % pattern.pattern_string()

        __parse_action(new_mode, fh, pattern.pattern_string(), pattern)

    except EndOfStreamException:
        fh.seek(position)
        error.error_eof(description, fh)

    return True
Example #11
0
def __parse_element(new_mode, fh):
    """Returns: False, if a closing '}' has been found.
                True, else.
    """
    position = fh.tell()
    try:
        description = "pattern or event handler" 

        skip_whitespace(fh)
        # NOTE: Do not use 'read_word' since we need to continue directly after
        #       whitespace, if a regular expression is to be parsed.
        position = fh.tell()

        word = read_until_whitespace(fh)
        if word == "}": return False

        # -- check for 'on_entry', 'on_exit', ...
        if __parse_event(new_mode, fh, word): return True

        fh.seek(position)
        description = "start of mode element: regular expression"
        pattern     = regular_expression.parse(fh)
        pattern.set_source_reference(SourceRef.from_FileHandle(fh, new_mode.name))

        position    = fh.tell()
        description = "start of mode element: code fragment for '%s'" % pattern.pattern_string()

        __parse_action(new_mode, fh, pattern.pattern_string(), pattern)

    except EndOfStreamException:
        fh.seek(position)
        error_eof(description, fh)

    return True
Example #12
0
def __parse(fh, result, IndentationSetupF=False):
    """Parses pattern definitions of the form:
   
          [ \t]                                       => grid 4;
          [:intersection([:alpha:], [\X064-\X066]):]  => space 1;

       In other words the right hand side *must* be a character set.
    """

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    while 1 + 1 == 2:
        skip_whitespace(fh)
        if check(fh, ">"):
            break

        # A regular expression state machine
        pattern, identifier, sr = __parse_definition_head(fh, result)
        if pattern is None and IndentationSetupF:
            error.log("Keyword '\\else' cannot be used in indentation setup.",
                      fh)

        # '__parse_definition_head()' ensures that only identifiers mentioned in
        # 'result' are accepted.
        if not IndentationSetupF:
            value = read_value_specifier(fh, identifier, 1)
            result.specify(identifier, pattern, value, sr)
        else:
            result.specify(identifier, pattern, sr)

        if not check(fh, ";"):
            error.log("Missing ';' after '%s' specification." % identifier, fh)

    return result
Example #13
0
def snap_set_expression(stream, PatternDict):
    assert     stream.__class__.__name__ == "StringIO" \
            or stream.__class__.__name__ == "file"

    __debug_entry("set_expression", stream)

    result = snap_property_set(stream)
    if result is not None: return result

    x = stream.read(2)
    if   x == "\\C":
        return snap_case_folded_pattern(stream, PatternDict, NumberSetF=True)

    elif x == "[:":
        result = snap_set_term(stream, PatternDict)
        skip_whitespace(stream)
        x = stream.read(2)
        if x != ":]":
            raise RegularExpressionException("Missing closing ':]' for character set expression.\n" + \
                                             "found: '%s'" % x)
    elif x[0] == "[":
        stream.seek(-1, 1)
        result = traditional_character_set.do(stream)   

    elif x[0] == "{":
        stream.seek(-1, 1)
        result = snap_replacement(stream, PatternDict, StateMachineF=False)   

    else:
        result = None

    return __debug_exit(result, stream)
Example #14
0
def parse(fh,
          CodeFragmentName,
          ErrorOnFailureF=True,
          AllowBriefTokenSenderF=True,
          ContinueF=True):
    """RETURNS: An object of class CodeUser containing
                line number, filename, and the code fragment.

                None in case of failure.
    """
    assert type(ErrorOnFailureF) == bool
    assert type(AllowBriefTokenSenderF) == bool

    skip_whitespace(fh)

    word = fh.read(2)
    if len(word) >= 1 and word[0] == "{":
        if len(word) > 1: fh.seek(-1, 1)  # unput the second character
        return __parse_normal(fh, CodeFragmentName)

    elif AllowBriefTokenSenderF and word == "=>":
        return __parse_brief_token_sender(fh, ContinueF)

    elif not ErrorOnFailureF:
        fh.seek(-2, 1)
        return None
    else:
        error_msg(
            "Missing code fragment after %s definition." % CodeFragmentName,
            fh)
Example #15
0
def parse(fh, mode_prep_prep_db):
    """This function parses a mode description and enters it into the 
    'blackboard.mode_prep_prep_db'. Modes are represented by Mode_PrepPrep
    objects.
    """

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    skip_whitespace(fh)
    mode_name = read_identifier(
        fh, OnMissingStr="Missing identifier at beginning of mode definition.")
    error.insight("Mode '%s'" % mode_name)

    # NOTE: constructor does register this mode in the mode_db
    new_mode = Mode_PrepPrep(mode_name, SourceRef.from_FileHandle(fh))
    if new_mode.name in mode_prep_prep_db:
        error.log("Mode '%s' has been defined twice.\n" % new_mode.name,
                  new_mode.sr,
                  DontExitF=True)
        error.log("Earlier definition here.",
                  mode_prep_prep_db[new_mode.name].sr)

    mode_prep_prep_db[new_mode.name] = new_mode

    # (*) inherited modes / option_db
    skip_whitespace(fh)
    dummy = fh.read(1)
    if dummy not in [":", "{"]:
        error.log("missing ':' or '{' after mode '%s'" % mode_name, fh)

    if dummy == ":":
        __parse_option_list(new_mode, fh)

    # (*) read in pattern-action pairs and events
    while __parse_element(new_mode, fh):
        pass
Example #16
0
    def _base_parse(self, fh, IndentationSetupF=False):
        """Parses pattern definitions of the form:
       
              [ \t]                                       => grid 4;
              [:intersection([:alpha:], [\X064-\X066]):]  => space 1;

        In other words the right hand side *must* be a character set.

        ADAPTS: result to contain parsing information.
        """

        # NOTE: Catching of EOF happens in caller: parse_section(...)
        #
        while 1 + 1 == 2:
            skip_whitespace(fh)
            if check(fh, ">"): 
                break
            
            # A regular expression state machine
            pattern, identifier, sr = _parse_definition_head(fh, self.identifier_list)
            if pattern is None and IndentationSetupF:
                error.log("Keyword '\\else' cannot be used in indentation setup.", fh)

            # '_parse_definition_head()' ensures that only identifiers mentioned in 
            # 'result' are accepted. 
            if self.requires_count():
                count = _read_value_specifier(fh, identifier, 1)
                self.specify(identifier, pattern, count, sr)
            else:
                self.specify(identifier, pattern, sr)

            if not check(fh, ";"):
                error.log("Missing ';' after '%s' specification." % identifier, fh)

        return self.finalize()
Example #17
0
def parse(fh):
    """This function parses a mode description and enters it into the 
       'mode_description_db'. Once all modes are parsed
       they can be translated into 'real' modes and are located in
       'blackboard.mode_db'. 
    """

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    skip_whitespace(fh)
    mode_name = read_identifier(fh)
    if mode_name == "":
        error_msg("missing identifier at beginning of mode definition.", fh)

    # NOTE: constructor does register this mode in the mode_db
    new_mode  = ModeDescription(mode_name, fh.name, get_current_line_info_number(fh))

    # (*) inherited modes / options
    skip_whitespace(fh)
    dummy = fh.read(1)
    if dummy not in [":", "{"]:
        error_msg("missing ':' or '{' after mode '%s'" % mode_name, fh)

    if dummy == ":":
        __parse_option_list(new_mode, fh)

    # (*) read in pattern-action pairs and events
    while __parse_element(new_mode, fh): 
        pass

    # (*) check for modes w/o pattern definitions
    if not new_mode.has_event_handler() and not new_mode.has_own_matches():
        if new_mode.options["inheritable"] != "only":
            new_mode.options["inheritable"] = "only"
            error_msg("Mode without pattern and event handlers needs to be 'inheritable only'.\n" + \
                      "<inheritable: only> has been added automatically.", fh,  DontExitF=True)
Example #18
0
def snap_property_set(stream):
    position = stream.tell()
    x = stream.read(2)
    if x == "\\P":
        stream.seek(position)
        return property.do(stream)
    elif x == "\\N":
        stream.seek(position)
        return property.do_shortcut(stream, "N", "na")  # UCS Property: Name
    elif x == "\\G":
        stream.seek(position)
        return property.do_shortcut(stream, "G",
                                    "gc")  # UCS Property: General_Category
    elif x == "\\E":
        skip_whitespace(stream)
        if check(stream, "{") == False:
            error.log("Missing '{' after '\\E'.", stream)
        encoding_name = __snap_until(stream, "}").strip()
        result = codec_db.get_supported_unicode_character_set(encoding_name)
        if result is None:
            error.log("Error occured at this place.", stream)
        return result
    else:
        stream.seek(position)
        return None
Example #19
0
def snap_curly_bracketed_expression(stream,
                                    PatternDict,
                                    Name,
                                    TriggerChar,
                                    MinN=1,
                                    MaxN=1):
    """Snaps a list of RE's in '{' and '}'. The separator between the patterns 
    is whitespace. 'MinN' and 'MaxN' determine the number of expected patterns.
    Set 'MaxN=INTEGER_MAX' for an arbitrary number of patterns.

    RETURNS: result = list of patterns, if MinN <= len(result) <= MaxN 
             else, the function sys.exit()-s.
    """
    assert MinN <= MaxN
    assert MinN > 0

    skip_whitespace(stream)

    # Read over the trigger character
    if not check(stream, "{"):
        error.log("Missing opening '{' after %s %s." % (Name, TriggerChar),
                  stream)

    result = []
    while 1 + 1 == 2:
        pattern = snap_expression(stream, PatternDict)
        if pattern is not None:
            result.append(pattern)

        if check(stream, "}"):
            break
        elif check_whitespace(stream):
            continue
        elif check(stream, "/") or check(stream, "$"):
            error.log(
                "Pre- or post contexts are not allowed in %s \\%s{...} expressions."
                % (Name, TriggerChar), stream)
        else:
            error.log(
                "Missing closing '}' %s in \\%s{...}." % (Name, TriggerChar),
                stream)

    if MinN != MaxN:
        if len(result) < MinN:
            error.log("At minimum %i pattern%s required between '{' and '}'" \
                      % (MinN, "" if MinN == 1 else "s"), stream)
        if len(result) > MaxN:
            error.log("At maximum %i pattern%s required between '{' and '}'" \
                      % (MaxN, "" if MaxN == 1 else "s"), stream)
    else:
        if len(result) != MinN:
            error.log("Exactly %i pattern%s required between '{' and '}'" \
                      % (MinN, "" if MinN == 1 else "s"), stream)

    def ensure_dfa(sm):
        if not sm.is_DFA_compliant(): return nfa_to_dfa.do(sm)
        else: return sm

    return [ensure_dfa(sm) for sm in result]
Example #20
0
def snap_replacement(stream, PatternDict, StateMachineF=True):
    """Snaps a predefined pattern from the input string and returns the resulting
       state machine.
    """
    skip_whitespace(stream)
    pattern_name = read_identifier(stream)
    if pattern_name == "":
        raise RegularExpressionException(
            "Pattern replacement expression misses identifier after '{'.")
    skip_whitespace(stream)

    if not check(stream, "}"):
        raise RegularExpressionException("Pattern replacement expression misses closing '}' after '%s'." \
                                         % pattern_name)

    error.verify_word_in_list(
        pattern_name, PatternDict.keys(),
        "Specifier '%s' not found in any preceeding 'define { ... }' section."
        % pattern_name, stream)

    reference = PatternDict[pattern_name]
    assert reference.__class__ == PatternShorthand

    # The replacement may be a state machine or a number set
    if StateMachineF:
        # Get a cloned version of state machine
        state_machine = reference.get_state_machine()
        assert isinstance(state_machine, DFA)

        # It is essential that state machines defined as patterns do not
        # have origins. Otherwise, the optimization of patterns that
        # contain pattern replacements might get confused and can
        # not find all optimizations.
        assert not state_machine.has_specific_acceptance_id()

        # A state machine, that contains pre- or post- conditions cannot be part
        # of a replacement. The addition of new post-contexts would mess up the pattern.
        ## if state_machine.has_pre_or_post_context():
        ##    error.log("Pre- or post-conditioned pattern was used in replacement.\n" + \
        ##              "Quex's regular expression grammar does not allow this.", stream)

        return state_machine

    else:
        # Get a cloned version of character set
        character_set = reference.get_character_set()
        if character_set is None:
            error.log(
                "Replacement in character set expression must be a character set.\n"
                "Specifier '%s' relates to a pattern state machine." %
                pattern_name, stream)

        if character_set.is_empty():
            error.log(
                "Referenced character set '%s' is empty.\nAborted." %
                pattern_name, stream)

        return character_set
Example #21
0
def _parse_function(fh):
    signature_str = read_until_character(fh, ":")
    signature     = Signature.from_string(signature_str)
    skip_whitespace(fh)
    # The function body remains a string until it is parsed at expansion time.
    function_body = read_until_character("\n").strip()
    name          = signature.function_name
    value         = FunctionCall(signature, function_body,
                                 Sr=SourceRef.from_FileHandle(fh))
    return name, value
Example #22
0
def snap_set_term(stream, PatternDict):
    global special_character_set_db

    __debug_entry("set_term", stream)

    operation_list = ["union", "intersection", "difference", "inverse"]
    character_set_list = special_character_set_db.keys()

    skip_whitespace(stream)
    position = stream.tell()

    # if there is no following '(', then enter the 'snap_expression' block below
    word = read_identifier(stream)

    if word in operation_list:
        set_list = snap_set_list(stream, word, PatternDict)
        # if an error occurs during set_list parsing, an exception is thrown about syntax error

        L = len(set_list)
        result = set_list[0]

        if word == "inverse":
            # The inverse of multiple sets, is to be the inverse of the union of these sets.
            if L > 1:
                for character_set in set_list[1:]:
                    result.unite_with(character_set)
            return __debug_exit(
                result.get_complement(Setup.buffer_codec.source_set), stream)

        if L < 2:
            raise RegularExpressionException("Regular Expression: A %s operation needs at least\n" % word + \
                                             "two sets to operate on them.")

        if word == "union":
            for set in set_list[1:]:
                result.unite_with(set)
        elif word == "intersection":
            for set in set_list[1:]:
                result.intersect_with(set)
        elif word == "difference":
            for set in set_list[1:]:
                result.subtract(set)

    elif word in character_set_list:
        reg_expr = special_character_set_db[word]
        result = traditional_character_set.do_string(reg_expr)

    elif word != "":
        error.verify_word_in_list(word, character_set_list + operation_list,
                                  "Unknown keyword '%s'." % word, stream)
    else:
        stream.seek(position)
        result = snap_set_expression(stream, PatternDict)

    return __debug_exit(result, stream)
Example #23
0
def _read_value_specifier(fh, Keyword, Default=None):
    skip_whitespace(fh)
    value = read_integer(fh)
    if value is not None:     return value

    # not a number received, is it an identifier?
    variable = read_identifier(fh)
    if   variable != "":      return variable
    elif Default is not None: return Default

    error.log("Missing integer or variable name after keyword '%s'." % Keyword, fh) 
Example #24
0
def read_character_code(fh):
    # NOTE: This function is tested with the regeression test for feature request 2251359.
    #       See directory $QUEX_PATH/TEST/2251359.
    pos = fh.tell()
    
    start = fh.read(1)
    if start == "":  
        fh.seek(pos); return -1

    elif start == "'": 
        # read an utf-8 char an get the token-id
        # Example: '+'
        if check(fh, "\\"):
            # snap_backslashed_character throws an exception if 'backslashed char' is nonsense.
            character_code = snap_backslashed_character.do(fh, ReducedSetOfBackslashedCharactersF=True)
        else:
            character_code = __read_one_utf8_code_from_stream(fh)

        if character_code is None:
            error.log("Missing utf8-character for definition of character code by character.", 
                      fh)

        elif fh.read(1) != '\'':
            error.log("Missing closing ' for definition of character code by character.", 
                      fh)

        return character_code

    if start == "U":
        if fh.read(1) != "C": fh.seek(pos); return -1
        # read Unicode Name 
        # Example: UC MATHEMATICAL_MONOSPACE_DIGIT_FIVE
        skip_whitespace(fh)
        ucs_name = __read_token_identifier(fh)
        if ucs_name == "": fh.seek(pos); return -1
        # Get the character set related to the given name. Note, the size of the set
        # is supposed to be one.
        character_code = ucs_property_db.get_character_set("Name", ucs_name)
        if type(character_code) in [str, unicode]:
            error.verify_word_in_list(ucs_name, ucs_property_db["Name"].code_point_db,
                                      "The string %s\ndoes not identify a known unicode character." % ucs_name, 
                                      fh)
        elif type(character_code) not in [int, long]:
            error.log("%s relates to more than one character in unicode database." % ucs_name, 
                      fh) 
        return character_code

    fh.seek(pos)
    character_code = read_integer(fh)
    if character_code is not None: return character_code

    # Try to interpret it as something else ...
    fh.seek(pos)
    return -1               
Example #25
0
def snap_set_term(stream, PatternDict):
    global special_character_set_db

    __debug_entry("set_term", stream)    

    operation_list     = [ "union", "intersection", "difference", "inverse"]
    character_set_list = special_character_set_db.keys()

    skip_whitespace(stream)
    position = stream.tell()

    # if there is no following '(', then enter the 'snap_expression' block below
    word = read_identifier(stream)

    if word in operation_list: 
        set_list = snap_set_list(stream, word, PatternDict)
        # if an error occurs during set_list parsing, an exception is thrown about syntax error

        L      = len(set_list)
        result = set_list[0]

        if word == "inverse":
            # The inverse of multiple sets, is to be the inverse of the union of these sets.
            if L > 1:
                for character_set in set_list[1:]:
                    result.unite_with(character_set)
            return __debug_exit(result.get_complement(Setup.buffer_codec.source_set), stream)

        if L < 2:
            raise RegularExpressionException("Regular Expression: A %s operation needs at least\n" % word + \
                                             "two sets to operate on them.")
            
        if   word == "union":
            for set in set_list[1:]:
                result.unite_with(set)
        elif word == "intersection":
            for set in set_list[1:]:
                result.intersect_with(set)
        elif word == "difference":
            for set in set_list[1:]:
                result.subtract(set)

    elif word in character_set_list:
        reg_expr = special_character_set_db[word]
        result   = traditional_character_set.do_string(reg_expr)

    elif word != "":
        verify_word_in_list(word, character_set_list + operation_list, 
                            "Unknown keyword '%s'." % word, stream)
    else:
        stream.seek(position)
        result = snap_set_expression(stream, PatternDict)

    return __debug_exit(result, stream)
Example #26
0
def __parse_element(new_mode, fh):
    """Returns: False, if a closing '}' has been found.
                True, else.
    """
    position = fh.tell()
    try:
        description = "pattern or event handler"

        skip_whitespace(fh)
        # NOTE: Do not use 'read_word' since we need to continue directly after
        #       whitespace, if a regular expression is to be parsed.
        position = fh.tell()

        identifier = read_identifier(fh)
        if identifier == "keyword_list":
            return __parse_keyword_list(new_mode, fh)
        elif similarity.get(identifier, ["keyword_list", "key words"]) != -1:
            error.warning(
                "'%s' is similar to keyword 'keyword_list'.\n"
                "For clarity, use quotes." % identifier, fh)
        elif identifier == "brief":
            return __parse_brief(new_mode, fh)
        elif similarity.get(identifier,
                            ["brief", "briefing", "briefly"]) != -1:
            error.warning(
                "'%s' is similar to keyword 'brief'.\n"
                "For clarity, use quotes." % identifier, fh)

        fh.seek(position)
        word = read_until_whitespace(fh)
        if word == "}":
            return False
            # -- check for 'on_entry', 'on_exit', ...
        elif __parse_event(new_mode, fh, word):
            return True

        fh.seek(position)
        description = "start of mode element: regular expression"
        pattern = regular_expression.parse(fh)
        pattern.set_source_reference(
            SourceRef.from_FileHandle(fh, new_mode.name))

        position = fh.tell()
        description = "start of mode element: code fragment for '%s'" % pattern.pattern_string(
        )

        __parse_action(new_mode, fh, pattern.pattern_string(), pattern)

    except EndOfStreamException:
        fh.seek(position)
        error.error_eof(description, fh)

    return True
Example #27
0
def snap_replacement(stream, PatternDict, StateMachineF=True):
    """Snaps a predefined pattern from the input string and returns the resulting
       state machine.
    """ 
    skip_whitespace(stream)
    pattern_name = read_identifier(stream)  
    if pattern_name == "":
        raise RegularExpressionException("Pattern replacement expression misses identifier after '{'.")
    skip_whitespace(stream)

    if not check(stream, "}"):
        raise RegularExpressionException("Pattern replacement expression misses closing '}' after '%s'." \
                                         % pattern_name)

    verify_word_in_list(pattern_name, PatternDict.keys(),
                        "Specifier '%s' not found in any preceeding 'define { ... }' section." % pattern_name, 
                        stream)

    reference = PatternDict[pattern_name]
    assert reference.__class__.__name__ == "PatternShorthand" 

    # The replacement may be a state machine or a number set
    if StateMachineF:
        # Get a cloned version of state machine
        state_machine = reference.get_state_machine()
        assert isinstance(state_machine, StateMachine)

        # It is essential that state machines defined as patterns do not 
        # have origins. Otherwise, the optimization of patterns that
        # contain pattern replacements might get confused and can
        # not find all optimizations.
        assert state_machine.has_origins() == False
            
        # A state machine, that contains pre- or post- conditions cannot be part
        # of a replacement. The addition of new post-contexts would mess up the pattern.
        ## if state_machine.has_pre_or_post_context():
        ##    error_msg("Pre- or post-conditioned pattern was used in replacement.\n" + \
        ##              "Quex's regular expression grammar does not allow this.", stream)
            
        return state_machine

    else:
        # Get a cloned version of character set
        character_set = reference.get_character_set()
        if character_set is None:
            error_msg("Replacement in character set expression must be a character set.\n"
                      "Specifier '%s' relates to a pattern state machine." % pattern_name, stream)

        if character_set.is_empty():
            error_msg("Referenced character set '%s' is empty.\nAborted." % pattern_name, stream)

        return character_set
Example #28
0
def snap_curly_bracketed_expression(stream, PatternDict, Name, TriggerChar, MinN=1, MaxN=1):
    """Snaps a list of RE's in '{' and '}'. The separator between the patterns is 
       whitespace. 'MinN' and 'MaxN' determine the number of expected patterns.
       Set 'MaxN=sys.maxint' for an arbitrary number of patterns.



       RETURNS: result = list of patterns. 

                it holds: len(result) >= MinN  
                          len(result) <= MaxN

                if not, the function sys.exit()-s.
       
    """
    assert MinN <= MaxN
    assert MinN > 0

    skip_whitespace(stream)

    # Read over the trigger character 
    if not check(stream, "{"):
        error_msg("Missing opening '{' after %s %s." % (Name, TriggerChar), stream)

    result = []
    while 1 + 1 == 2:
        pattern = snap_expression(stream, PatternDict) 
        if pattern is not None:
            result.append(pattern)

        if check(stream, "}"):
            break
        elif check_whitespace(stream):
            continue
        elif check(stream, "/") or check(stream, "$"):
            error_msg("Pre- or post contexts are not allowed in %s \\%s{...} expressions." % (Name, TriggerChar), stream)
        else:
            error_msg("Missing closing '}' %s in \\%s{...}." % (Name, TriggerChar), stream)

    if MinN != MaxN:
        if len(result) < MinN:
            error_msg("At minimum %i pattern%s required between '{' and '}'" \
                      % (MinN, "" if MinN == 1 else "s"), stream)
        if len(result) > MaxN:
            error_msg("At maximum %i pattern%s required between '{' and '}'" \
                      % (MaxN, "" if MaxN == 1 else "s"), stream)
    else:
        if len(result) != MinN:
            error_msg("Exactly %i pattern%s required between '{' and '}'" \
                      % (MinN, "" if MinN == 1 else "s"), stream)

    return result
Example #29
0
def parse_token_id_definitions(fh, NamesOnlyF=False):
    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    token_prefix       = Setup.token_id_prefix
    token_prefix_plain = Setup.token_id_prefix_plain # i.e. without name space included

    if NamesOnlyF: db = {}
    else:          db = blackboard.token_id_db

    skip_whitespace(fh)
    if not check(fh, "{"):
        error_msg("missing opening '{' for after 'token' section identifier.\n", fh)

    while check(fh, "}") == False:
        skip_whitespace(fh)

        candidate = read_identifier(fh, TolerantF=True)

        if candidate == "":
            error_msg("Missing valid token identifier." % candidate, fh)

        # -- check the name, if it starts with the token prefix paste a warning
        if candidate.find(token_prefix) == 0:
            error_msg("Token identifier '%s' starts with token prefix '%s'.\n" % (candidate, token_prefix) + \
                      "Token prefix is mounted automatically. This token id appears in the source\n" + \
                      "code as '%s%s'." % (token_prefix, candidate), \
                      fh, DontExitF=True)
        elif candidate.find(token_prefix_plain) == 0:
            error_msg("Token identifier '%s' starts with token prefix '%s'.\n" % (candidate, token_prefix) + \
                      "Token prefix is mounted automatically. This token id appears in the source\n" + \
                      "code as '%s%s'." % (token_prefix, candidate), \
                      fh, DontExitF=True)

        skip_whitespace(fh)

        if NamesOnlyF:
            db[token_prefix + candidate] = True
            if check(fh, ";") == False:
                error_msg("Missing ';' after definition of token identifier '%s'.\n" % candidate + \
                          "This is mandatory since Quex version 0.50.1.", fh)
            continue

        # Parse a possible numeric value after '='
        numeric_value = None
        if check(fh, "="):
            skip_whitespace(fh)
            numeric_value = read_integer(fh)
            if numeric_value is None:
                error_msg("Missing number after '=' for token identifier '%s'." % candidate, fh)

        if check(fh, ";") == False:
            error_msg("Missing ';' after definition of token identifier '%s'.\n" % candidate + \
                      "This is mandatory since Quex version 0.50.1.", fh)

        db[candidate] = TokenInfo(candidate, numeric_value, Filename=fh.name, LineN=get_current_line_info_number(fh))

    if NamesOnlyF:
        result = db.keys()
        result.sort()
        return result
Example #30
0
def __parse_option_list(new_mode, fh):
    position = fh.tell()
    try:  
        # ':' => inherited modes/options follow
        skip_whitespace(fh)

        __parse_base_mode_list(fh, new_mode)
        
        while __parse_option(fh, new_mode):
            pass

    except EndOfStreamException:
        fh.seek(position)
        error_msg("End of file reached while parsing options of mode '%s'." % new_mode.name, fh)
Example #31
0
def __parse_option_list(new_mode, fh):
    position = fh.tell()
    try:  
        # ':' => inherited modes/option_db follow
        skip_whitespace(fh)

        __parse_base_mode_list(fh, new_mode)
        
        while mode_option.parse(fh, new_mode):
            pass

    except EndOfStreamException:
        fh.seek(position)
        error_eof("mode '%s'." % new_mode.name, fh)
Example #32
0
def __parse_option_list(new_mode, fh):
    position = fh.tell()
    try:
        # ':' => inherited modes/option_db follow
        skip_whitespace(fh)

        __parse_base_mode_list(fh, new_mode)

        while mode_option.parse(fh, new_mode):
            pass

    except EndOfStreamException:
        fh.seek(position)
        error.error_eof("mode '%s'." % new_mode.name, fh)
Example #33
0
def __parse_skip_option(fh, new_mode, identifier):
    """A skipper 'eats' characters at the beginning of a pattern that belong to
    a specified set of characters. A useful application is most probably the
    whitespace skipper '[ \t\n]'. The skipper definition allows quex to
    implement a very effective way to skip these regions."""

    pattern, trigger_set = regular_expression.parse_character_set(fh, ">")

    skip_whitespace(fh)

    if fh.read(1) != ">":
        error.log("missing closing '>' for mode option '%s'." % identifier, fh)
    elif trigger_set.is_empty():
        error.log("Empty trigger set for skipper." % identifier, fh)

    return pattern, trigger_set
Example #34
0
def __parse_option_list(new_mode, fh):
    position = fh.tell()
    try:
        # ':' => inherited modes/options follow
        skip_whitespace(fh)

        __parse_base_mode_list(fh, new_mode)

        while __parse_option(fh, new_mode):
            pass

    except EndOfStreamException:
        fh.seek(position)
        error_msg(
            "End of file reached while parsing options of mode '%s'." %
            new_mode.name, fh)
Example #35
0
def __parse_skip_option(fh, new_mode, identifier):
    """A skipper 'eats' characters at the beginning of a pattern that belong to
    a specified set of characters. A useful application is most probably the
    whitespace skipper '[ \t\n]'. The skipper definition allows quex to
    implement a very effective way to skip these regions."""

    pattern, trigger_set = regular_expression.parse_character_set(fh, ">")

    skip_whitespace(fh)

    if fh.read(1) != ">":
        error.log("missing closing '>' for mode option '%s'." % identifier, fh)
    elif trigger_set.is_empty():
        error.log("Empty trigger set for skipper." % identifier, fh)

    return pattern, trigger_set
Example #36
0
def __parse_action(new_mode, fh, pattern_str, pattern):

    position = fh.tell()
    try:
        skip_whitespace(fh)
        position = fh.tell()

        code_obj = code_fragment.parse(fh,
                                       "regular expression",
                                       ErrorOnFailureF=False)
        if code_obj is not None:
            new_mode.add_match(pattern_str, code_obj, pattern)
            return

        fh.seek(position)
        word = read_until_letter(fh, [";"])
        if word == "PRIORITY-MARK":
            # This mark 'lowers' the priority of a pattern to the priority of the current
            # pattern index (important for inherited patterns, that have higher precedence).
            # The parser already constructed a state machine for the pattern that is to
            # be assigned a new priority. Since, this machine is not used, let us just
            # use its id.
            fh.seek(-1, 1)
            check_or_die(fh, ";",
                         ". Since quex version 0.33.5 this is required.")
            new_mode.add_match_priority(pattern_str, pattern,
                                        pattern.sm.get_id(), fh.name,
                                        get_current_line_info_number(fh))

        elif word == "DELETION":
            # This mark deletes any pattern that was inherited with the same 'name'
            fh.seek(-1, 1)
            check_or_die(fh, ";",
                         ". Since quex version 0.33.5 this is required.")
            new_mode.add_match_deletion(pattern_str, pattern, fh.name,
                                        get_current_line_info_number(fh))

        else:
            error_msg("Missing token '{', 'PRIORITY-MARK', 'DELETION', or '=>' after '%s'.\n" % pattern_str + \
                      "found: '%s'. Note, that since quex version 0.33.5 it is required to add a ';'\n" % word + \
                      "to the commands PRIORITY-MARK and DELETION.", fh)

    except EndOfStreamException:
        fh.seek(position)
        error_msg("End of file reached while parsing action code for pattern.",
                  fh)
Example #37
0
def __create_mode_transition_and_token_sender(fh, Command):
    assert Command in ["GOTO", "GOSUB", "GOUP"]

    position = fh.tell()
    LanguageDB = Setup.language_db
    target_mode = ""
    token_sender = ""
    if check(fh, "("):
        skip_whitespace(fh)
        if Command != "GOUP":
            target_mode = __read_token_identifier(fh)
            skip_whitespace(fh)

        if check(fh, ")"):
            token_sender = ""

        elif Command == "GOUP" or check(fh, ","):
            skip_whitespace(fh)
            token_name = __read_token_identifier(fh)
            skip_whitespace(fh)

            if check(fh, ","):
                error_msg(
                    "Missing opening '(' after token name specification.\n"
                    "Note, that since version 0.50.1 the syntax for token senders\n"
                    "inside brief mode transitions is like:\n\n"
                    "     => GOTO(MYMODE, QUEX_TKN_MINE(Argument0, Argument1, ...));\n",
                    fh)

            token_sender = __create_token_sender_by_token_name(fh, token_name)

            if check(fh, ")") == False:
                error_msg("Missing closing ')' or ',' after '%s'." % Command,
                          fh)

        else:
            fh.seek(position)
            error_msg("Missing closing ')' or ',' after '%s'." % Command, fh)

    if check(fh, ";") == False:
        error_msg("Missing ')' or ';' after '%s'." % Command, fh)

    if Command in ["GOTO", "GOSUB"] and target_mode == "":
        error_msg(
            "Command %s requires at least one argument: The target mode." %
            Command, fh)

    # Code for mode change
    if Command == "GOTO": txt = LanguageDB.MODE_GOTO(target_mode)
    elif Command == "GOSUB": txt = LanguageDB.MODE_GOSUB(target_mode)
    else: txt = LanguageDB.MODE_GOUP()

    # Code for token sending
    txt += token_sender

    return txt
Example #38
0
def parse_pattern_name_definitions(fh):
    """Parses pattern definitions of the form:
   
          WHITESPACE  [ \t\n]
          IDENTIFIER  [a-zA-Z0-9]+
          OP_PLUS     "+"
          
       That means: 'name' whitespace 'regular expression' whitespace newline.
       Comments can only be '//' nothing else and they have to appear at the
       beginning of the line.
       
       One regular expression can have more than one name, but one name can 
       only have one regular expression.
    """
    skip_whitespace(fh)
    if not check(fh, "{"):
        error.log("define region must start with opening '{'.", fh)

    while 1 + 1 == 2:
        skip_whitespace(fh)

        if check(fh, "}"):
            return

        # -- get the name of the pattern
        skip_whitespace(fh)
        pattern_name = read_identifier(
            fh, OnMissingStr="Missing identifier for pattern definition.")

        if blackboard.shorthand_db.has_key(pattern_name):
            error.log("Second definition of pattern '%s'.\n" % pattern_name + \
                      "Pattern names must be unique.", fh)

        skip_whitespace(fh)

        if check(fh, "}"):
            error.log("Missing regular expression for pattern definition '%s'." % \
                      pattern_name, fh)

        # A regular expression state machine
        # (No possible transformation into a particular codec whatever.
        #  the state machines are transformed once, after they are expanded
        #  as patterns in a mode.)
        pattern = regular_expression.parse(fh, AllowNothingIsFineF=True)

        if pattern.has_pre_or_post_context():
            error.log("Pattern definition with pre- and/or post-context.\n" + \
                      "Pre- and Post-Contexts can only be defined inside mode definitions.",
                      fh)
        state_machine = pattern.sm

        blackboard.shorthand_db[pattern_name] = \
                PatternShorthand(pattern_name, state_machine,
                                 SourceRef.from_FileHandle(fh), pattern.pattern_string())
Example #39
0
def parse_pattern_name_definitions(fh):
    """Parses pattern definitions of the form:
   
          WHITESPACE  [ \t\n]
          IDENTIFIER  [a-zA-Z0-9]+
          OP_PLUS     "+"
          
       That means: 'name' whitespace 'regular expression' whitespace newline.
       Comments can only be '//' nothing else and they have to appear at the
       beginning of the line.
       
       One regular expression can have more than one name, but one name can 
       only have one regular expression.
    """
    skip_whitespace(fh)
    if not check(fh, "{"):
        error_msg("define region must start with opening '{'.", fh)

    while 1 + 1 == 2:
        skip_whitespace(fh)

        if check(fh, "}"):
            return

        # -- get the name of the pattern
        skip_whitespace(fh)
        pattern_name = read_identifier(fh)
        if pattern_name == "":
            error_msg("Missing identifier for pattern definition.", fh)

        skip_whitespace(fh)

        if check(fh, "}"):
            error_msg("Missing regular expression for pattern definition '%s'." % \
                      pattern_name, fh)

        # A regular expression state machine
        # (No possible transformation into a particular codec whatever.
        #  the state machines are transformed once, after they are expanded
        #  as patterns in a mode.)
        regular_expression_str, pattern = \
                regular_expression.parse(fh, AllowNothingIsFineF = True,
                                         AllowStateMachineTrafoF = False)

        if pattern.has_pre_or_post_context():
            error_msg("Pattern definition with pre- and/or post-context.\n" + \
                      "Pre- and Post-Contexts can only be defined inside mode definitions.", fh)
        state_machine = pattern.sm

        blackboard.shorthand_db[pattern_name] = \
                blackboard.PatternShorthand(pattern_name, state_machine,
                                            fh.name, get_current_line_info_number(fh),
                                            regular_expression_str)
Example #40
0
def parse_pattern_name_definitions(fh):
    """Parses pattern definitions of the form:
   
          WHITESPACE  [ \t\n]
          IDENTIFIER  [a-zA-Z0-9]+
          OP_PLUS     "+"
          
       That means: 'name' whitespace 'regular expression' whitespace newline.
       Comments can only be '//' nothing else and they have to appear at the
       beginning of the line.
       
       One regular expression can have more than one name, but one name can 
       only have one regular expression.
    """
    skip_whitespace(fh)
    if not check(fh, "{"):
        error_msg("define region must start with opening '{'.", fh)

    while 1 + 1 == 2:
        skip_whitespace(fh)

        if check(fh, "}"): 
            return
        
        # -- get the name of the pattern
        skip_whitespace(fh)
        pattern_name = read_identifier(fh)
        if pattern_name == "":
            error_msg("Missing identifier for pattern definition.", fh)

        skip_whitespace(fh)

        if check(fh, "}"): 
            error_msg("Missing regular expression for pattern definition '%s'." % \
                      pattern_name, fh)

        # A regular expression state machine
        # (No possible transformation into a particular codec whatever.
        #  the state machines are transformed once, after they are expanded
        #  as patterns in a mode.)
        regular_expression_str, pattern = \
                regular_expression.parse(fh, AllowNothingIsFineF = True, 
                                         AllowStateMachineTrafoF = False) 

        if pattern.has_pre_or_post_context():
            error_msg("Pattern definition with pre- and/or post-context.\n" + \
                      "Pre- and Post-Contexts can only be defined inside mode definitions.", fh)
        state_machine = pattern.sm

        blackboard.shorthand_db[pattern_name] = \
                blackboard.PatternShorthand(pattern_name, state_machine, 
                                            fh.name, get_current_line_info_number(fh),
                                            regular_expression_str)
Example #41
0
def __create_mode_transition_and_token_sender(fh, Op):
    assert Op in ["GOTO", "GOSUB", "GOUP"]

    position     = fh.tell()
    target_mode  = ""
    token_sender = ""
    if check(fh, "("):
        skip_whitespace(fh)
        if Op != "GOUP":
            target_mode = __read_token_identifier(fh)
            skip_whitespace(fh)

        if check(fh, ")"):
            token_sender = ""

        elif Op == "GOUP" or check(fh, ","):
            skip_whitespace(fh)
            token_name = __read_token_identifier(fh)
            skip_whitespace(fh)

            if check(fh, ","):
                error.log("Missing opening '(' after token name specification.\n" 
                          "Note, that since version 0.50.1 the syntax for token senders\n"
                          "inside brief mode transitions is like:\n\n"
                          "     => GOTO(MYMODE, QUEX_TKN_MINE(Argument0, Argument1, ...));\n", 
                          fh)

            token_sender = __create_token_sender_by_token_name(fh, token_name) 

            if check(fh, ")") == False:
                error.log("Missing closing ')' or ',' after '%s'." % Op, 
                          fh)

        else:
            fh.seek(position)
            error.log("Missing closing ')' or ',' after '%s'." % Op, fh)

    if check(fh, ";") == False:
        error.log("Missing ')' or ';' after '%s'." % Op, fh)

    if Op in ["GOTO", "GOSUB"] and target_mode == "": 
        error.log("Op %s requires at least one argument: The target mode." % Op, 
                  fh)

    # Code for mode change
    if   Op == "GOTO":  txt = Lng.MODE_GOTO(target_mode)
    elif Op == "GOSUB": txt = Lng.MODE_GOSUB(target_mode)
    else:                    txt = Lng.MODE_GOUP()

    # Code for token sending
    txt += token_sender

    return txt
Example #42
0
File: core.py Project: xxyzzzq/quex
def parse_pattern_name_definitions(fh):
    """Parses pattern definitions of the form:
   
          WHITESPACE  [ \t\n]
          IDENTIFIER  [a-zA-Z0-9]+
          OP_PLUS     "+"
          
       That means: 'name' whitespace 'regular expression' whitespace newline.
       Comments can only be '//' nothing else and they have to appear at the
       beginning of the line.
       
       One regular expression can have more than one name, but one name can 
       only have one regular expression.
    """
    skip_whitespace(fh)
    if not check(fh, "{"):
        error.log("define region must start with opening '{'.", fh)

    while 1 + 1 == 2:
        skip_whitespace(fh)

        if check(fh, "}"): 
            return
        
        # -- get the name of the pattern
        skip_whitespace(fh)
        pattern_name = read_identifier(fh, OnMissingStr="Missing identifier for pattern definition.")

        if blackboard.shorthand_db.has_key(pattern_name):
            error.log("Second definition of pattern '%s'.\n" % pattern_name + \
                      "Pattern names must be unique.", fh)

        skip_whitespace(fh)

        if check(fh, "}"): 
            error.log("Missing regular expression for pattern definition '%s'." % \
                      pattern_name, fh)

        # A regular expression state machine
        # (No possible transformation into a particular codec whatever.
        #  the state machines are transformed once, after they are expanded
        #  as patterns in a mode.)
        pattern = regular_expression.parse(fh, AllowNothingIsFineF = True) 

        if pattern.has_pre_or_post_context():
            error.log("Pattern definition with pre- and/or post-context.\n" + \
                      "Pre- and Post-Contexts can only be defined inside mode definitions.", 
                      fh)
        state_machine = pattern.sm

        blackboard.shorthand_db[pattern_name] = \
                PatternShorthand(pattern_name, state_machine, 
                                 SourceRef.from_FileHandle(fh), pattern.pattern_string())
Example #43
0
def __parse_element(new_mode, fh):
    """Returns: False, if a closing '}' has been found.
                True, else.
    """
    position = fh.tell()
    try:
        description = "Pattern or event handler name.\n" + \
                      "Missing closing '}' for end of mode"

        skip_whitespace(fh)
        # NOTE: Do not use 'read_word' since we need to continue directly after
        #       whitespace, if a regular expression is to be parsed.
        position = fh.tell()

        word = read_until_whitespace(fh)
        if word == "}": return False

        # -- check for 'on_entry', 'on_exit', ...
        if __parse_event(new_mode, fh, word): return True

        fh.seek(position)
        description = "Start of mode element: regular expression"
        pattern_str, pattern = regular_expression.parse(fh)

        if new_mode.has_pattern(pattern_str):
            previous = new_mode.get_pattern_action_pair(pattern_str)
            error_msg("Pattern has been defined twice.", fh, DontExitF=True)
            error_msg("First defined here.",
                      previous.action().filename,
                      previous.action().line_n)

        position = fh.tell()
        description = "Start of mode element: code fragment for '%s'" % pattern_str

        __parse_action(new_mode, fh, pattern_str, pattern)

    except EndOfStreamException:
        fh.seek(position)
        error_msg("End of file reached while parsing %s." % description, fh)

    return True
def snap_property_set(stream):
    position = stream.tell()
    x = stream.read(2)
    if   x == "\\P": 
        stream.seek(position)
        return property.do(stream)
    elif x == "\\N": 
        stream.seek(position)
        return property.do_shortcut(stream, "N", "na") # UCS Property: Name
    elif x == "\\G": 
        stream.seek(position)
        return property.do_shortcut(stream, "G", "gc") # UCS Property: General_Category
    elif x == "\\E": 
        skip_whitespace(stream)
        if check(stream, "{") == False:
            error_msg("Missing '{' after '\\E'.", stream)
        encoding_name = __snap_until(stream, "}").strip()
        return codec_db.get_supported_unicode_character_set(encoding_name, FH=stream)
    else:
        stream.seek(position)
        return None
Example #45
0
def __parse_range_skipper_option(fh, identifier, new_mode):
    """A non-nesting skipper can contain a full fledged regular expression as opener,
    since it only effects the trigger. Not so the nested range skipper-see below.
    """
    # Range state machines only accept 'strings' not state machines
    # Pattern: opener 'white space' closer 'white space' '>'
    skip_whitespace(fh)
    opener_pattern = regular_expression.parse_non_precontexted_pattern(
        fh, identifier, ">", AllowNothingIsFineF=False)
    _assert_pattern_constaints(opener_pattern, "Skip range opener", fh)

    skip_whitespace(fh)
    closer_pattern = regular_expression.parse_non_precontexted_pattern(
        fh, identifier, ">", AllowNothingIsFineF=True)
    _assert_pattern_constaints(closer_pattern, "Skip range closer", fh)

    opener_pattern.set_pattern_string("<%s open>" % identifier)
    closer_pattern.set_pattern_string("<%s close>" % identifier)

    # -- closer
    skip_whitespace(fh)
    if fh.read(1) != ">":
        error.log("missing closing '>' for mode option '%s'" % identifier, fh)

    return SkipRangeData(opener_pattern, closer_pattern)
Example #46
0
def parse(fh):
    """Parses pattern definitions of the form:
   
          WHITESPACE  [ \t\n]
          IDENTIFIER  [a-zA-Z0-9]+
          OP_PLUS     "+"

          \function SOMETHING(sm = X, set = Y, number = N):
          
       That means: 'name' whitespace 'regular expression' whitespace newline.
       Comments can only be '//' nothing else and they have to appear at the
       beginning of the line.
       
       One regular expression can have more than one name, but one name can 
       only have one regular expression.
    """
    skip_whitespace(fh)
    if not check(fh, "{"):
        error.log("define region must start with opening '{'.", fh)

    while 1 + 1 == 2:
        skip_whitespace(fh)

        if check(fh, "}"): 
            return
        
        # Get the name of the pattern
        skip_whitespace(fh)
        if check(fh, "\\function"): name, value = _parse_function(fh)
        else:                       name, value = _parse_pattern(fh)

        blackboard.shorthand_db[name] = value
Example #47
0
def __parse_range_skipper_option(fh, identifier, new_mode):
    """A non-nesting skipper can contain a full fledged regular expression as opener,
    since it only effects the trigger. Not so the nested range skipper-see below.
    """

    # Range state machines only accept 'strings' not state machines
    # Pattern: opener 'white space' closer 'white space' '>'
    skip_whitespace(fh)
    opener_pattern, opener_sequence = regular_expression.parse_character_string(
        fh, ">")
    skip_whitespace(fh)
    closer_pattern, closer_sequence = regular_expression.parse_character_string(
        fh, ">")

    # -- closer
    skip_whitespace(fh)
    if fh.read(1) != ">":
        error.log("missing closing '>' for mode option '%s'" % identifier, fh)
    elif len(opener_sequence) == 0:
        error.log("Empty sequence for opening delimiter.", fh)
    elif len(closer_sequence) == 0:
        error.log("Empty sequence for closing delimiter.", fh)

    return SkipRangeData(opener_pattern, opener_sequence, \
                         closer_pattern, closer_sequence)
Example #48
0
def __parse_action(new_mode, fh, pattern_str, pattern):

    position = fh.tell()
    try:
        skip_whitespace(fh)
        position = fh.tell()
            
        code_obj = code_fragment.parse(fh, "regular expression", ErrorOnFailureF=False) 
        if code_obj is not None:
            new_mode.add_match(pattern_str, code_obj, pattern)
            return

        fh.seek(position)
        word = read_until_letter(fh, [";"])
        if word == "PRIORITY-MARK":
            # This mark 'lowers' the priority of a pattern to the priority of the current
            # pattern index (important for inherited patterns, that have higher precedence).
            # The parser already constructed a state machine for the pattern that is to
            # be assigned a new priority. Since, this machine is not used, let us just
            # use its id.
            fh.seek(-1, 1)
            check_or_die(fh, ";", ". Since quex version 0.33.5 this is required.")
            new_mode.add_match_priority(pattern_str, pattern, pattern.sm.get_id(), 
                                        fh.name, get_current_line_info_number(fh))

        elif word == "DELETION":
            # This mark deletes any pattern that was inherited with the same 'name'
            fh.seek(-1, 1)
            check_or_die(fh, ";", ". Since quex version 0.33.5 this is required.")
            new_mode.add_match_deletion(pattern_str, pattern, fh.name, get_current_line_info_number(fh))
            
        else:
            error_msg("Missing token '{', 'PRIORITY-MARK', 'DELETION', or '=>' after '%s'.\n" % pattern_str + \
                      "found: '%s'. Note, that since quex version 0.33.5 it is required to add a ';'\n" % word + \
                      "to the commands PRIORITY-MARK and DELETION.", fh)


    except EndOfStreamException:
        fh.seek(position)
        error_msg("End of file reached while parsing action code for pattern.", fh)
Example #49
0
def __parse_element(new_mode, fh):
    """Returns: False, if a closing '}' has been found.
                True, else.
    """
    position = fh.tell()
    try:
        description = "Pattern or event handler name.\n" + \
                      "Missing closing '}' for end of mode"

        skip_whitespace(fh)
        # NOTE: Do not use 'read_word' since we need to continue directly after
        #       whitespace, if a regular expression is to be parsed.
        position = fh.tell()

        word = read_until_whitespace(fh)
        if word == "}": return False

        # -- check for 'on_entry', 'on_exit', ...
        if __parse_event(new_mode, fh, word): return True

        fh.seek(position)
        description = "Start of mode element: regular expression"
        pattern_str, pattern = regular_expression.parse(fh)

        if new_mode.has_pattern(pattern_str):
            previous = new_mode.get_pattern_action_pair(pattern_str)
            error_msg("Pattern has been defined twice.", fh, DontExitF=True)
            error_msg("First defined here.", 
                      previous.action().filename, previous.action().line_n)

        position    = fh.tell()
        description = "Start of mode element: code fragment for '%s'" % pattern_str

        __parse_action(new_mode, fh, pattern_str, pattern)

    except EndOfStreamException:
        fh.seek(position)
        error_msg("End of file reached while parsing %s." % description, fh)

    return True
def snap_set_list(stream, set_operation_name, PatternDict):
    __debug_entry("set_list", stream)

    skip_whitespace(stream)
    if stream.read(1) != "(":
        raise RegularExpressionException(
            "Missing opening bracket '%s' operation." % set_operation_name)

    set_list = []
    while 1 + 1 == 2:
        skip_whitespace(stream)
        result = snap_set_term(stream, PatternDict)
        if result is None:
            raise RegularExpressionException(
                "Missing set expression list after '%s' operation." %
                set_operation_name)
        set_list.append(result)
        skip_whitespace(stream)
        tmp = stream.read(1)
        if tmp != ",":
            if tmp != ")":
                stream.seek(-1, 1)
                raise RegularExpressionException(
                    "Missing closing ')' after after '%s' operation." %
                    set_operation_name)
            return __debug_exit(set_list, stream)
Example #51
0
def __parse_brief_token_sender(fh, ContinueF):
    # shorthand for { self.send(TKN_SOMETHING); QUEX_SETTING_AFTER_SEND_CONTINUE_OR_RETURN(); }

    position = fh.tell()
    try:
        skip_whitespace(fh)
        position = fh.tell()

        code = __parse_token_id_specification_by_character_code(fh)
        if code != -1:
            code = __create_token_sender_by_character_code(fh, code)
        else:
            skip_whitespace(fh)
            identifier = __read_token_identifier(fh)
            skip_whitespace(fh)
            if identifier in ["GOTO", "GOSUB", "GOUP"]:
                code = __create_mode_transition_and_token_sender(
                    fh, identifier)
            else:
                code = __create_token_sender_by_token_name(fh, identifier)
                check_or_die(fh, ";")

        if len(code) != 0:
            if ContinueF:
                code += "QUEX_SETTING_AFTER_SEND_CONTINUE_OR_RETURN();\n"
            return CodeUser(code, SourceRef.from_FileHandle(fh))
        else:
            return None

    except EndOfStreamException:
        fh.seek(position)
        error_eof("token", fh)
Example #52
0
def __parse_brief_token_sender(fh, ContinueF):
    # shorthand for { self.send(TKN_SOMETHING); QUEX_SETTING_AFTER_SEND_CONTINUE_OR_RETURN(); }
    LanguageDB = Setup.language_db
    
    position = fh.tell()
    line_n   = get_current_line_info_number(fh) + 1
    try: 
        skip_whitespace(fh)
        position = fh.tell()

        code = __parse_token_id_specification_by_character_code(fh)
        if code != -1: 
            code = __create_token_sender_by_character_code(fh, code)
        else:
            skip_whitespace(fh)
            identifier = __read_token_identifier(fh)
            skip_whitespace(fh)
            if identifier in ["GOTO", "GOSUB", "GOUP"]:
                code = __create_mode_transition_and_token_sender(fh, identifier)
            else:
                code = __create_token_sender_by_token_name(fh, identifier)
                check_or_die(fh, ";")

        if code != "": 
            if ContinueF: code += "QUEX_SETTING_AFTER_SEND_CONTINUE_OR_RETURN();\n"
            return UserCodeFragment(code, fh.name, line_n, LanguageDB)
        else:
            return None

    except EndOfStreamException:
        fh.seek(position)
        error_msg("End of file reached while parsing token shortcut.", fh)
Example #53
0
def __parse_brief_token_sender(fh, ContinueF):
    # shorthand for { self.send(TKN_SOMETHING); QUEX_SETTING_AFTER_SEND_CONTINUE_OR_RETURN(); }
    
    
    position = fh.tell()
    try: 
        skip_whitespace(fh)
        position = fh.tell()

        code = __parse_token_id_specification_by_character_code(fh)
        if code != -1: 
            code = __create_token_sender_by_character_code(fh, code)
        else:
            skip_whitespace(fh)
            identifier = __read_token_identifier(fh)
            skip_whitespace(fh)
            if identifier in ["GOTO", "GOSUB", "GOUP"]:
                code = __create_mode_transition_and_token_sender(fh, identifier)
            else:
                code = __create_token_sender_by_token_name(fh, identifier)
                check_or_die(fh, ";")

        if len(code) != 0: 
            if ContinueF: code += "QUEX_SETTING_AFTER_SEND_CONTINUE_OR_RETURN();\n"
            return CodeUser(code, SourceRef.from_FileHandle(fh))
        else:
            return None

    except EndOfStreamException:
        fh.seek(position)
        error.error_eof("token", fh)
Example #54
0
def read_option_start(fh):
    skip_whitespace(fh)

    # (*) base modes 
    if fh.read(1) != "<": 
        ##fh.seek(-1, 1) 
        return None

    skip_whitespace(fh)
    identifier = read_identifier(fh, OnMissingStr="Missing identifer after start of mode option '<'").strip()

    skip_whitespace(fh)
    if fh.read(1) != ":": error.log("missing ':' after option name '%s'" % identifier, fh)
    skip_whitespace(fh)

    return identifier
Example #55
0
def get_codec_transformation_info(Codec=None, FileName=None, FH=-1, LineN=None):
    """Provides the information about the relation of character codes in a particular 
       coding to unicode character codes. It is provided in the following form:

       # Codec Values                 Unicode Values
       [ (Source0_Begin, Source0_End, TargetInterval0_Begin), 
         (Source1_Begin, Source1_End, TargetInterval1_Begin),
         (Source2_Begin, Source2_End, TargetInterval2_Begin), 
         ... 
       ]

       Arguments FH and LineN correspond to the arguments of error_msg.
    """
    assert Codec is not None or FileName is not None

    if FileName is not None:
        file_name = FileName
    else:
        distinct_codec = __get_distinct_codec_name_for_alias(Codec)
        file_name      = __codec_db_path + "/%s.dat" % distinct_codec

    fh = open_file_or_die(file_name, "rb")

    # Read coding into data structure
    transformation_list = []
    try:
        while 1 + 1 == 2:
            skip_whitespace(fh)
            source_begin = read_integer(fh)
            if source_begin is None:
                error_msg("Missing integer (source interval begin) in codec file.", fh)
            skip_whitespace(fh)
            source_size = read_integer(fh)
            if source_size is None:
                error_msg("Missing integer (source interval size) in codec file.", fh)
            skip_whitespace(fh)
            target_begin = read_integer(fh)
            if target_begin is None:
                error_msg("Missing integer (target interval begin) in codec file.", fh)

            source_end = source_begin + source_size
            transformation_list.append([source_begin, source_end, target_begin])
    except EndOfStreamException:
        pass

    return transformation_list
Example #56
0
def do(section_list, fh):
    """Parses a codec information file. The described codec can only be
    a 'static character length' encoding. That is every character in the
    code occupies the same number of bytes.

    RETURNS: [0] Set of characters in unicode which are covered by the
                 described codec.
             [1] Range of values in the codec elements.
    """
    source_set = NumberSet()
    drain_set  = NumberSet()

    error_str = None

    try:
        while error_str is None:
            skip_whitespace(fh)
            source_begin = read_integer(fh)
            if source_begin is None:
                error_str = "Missing integer (source interval begin) in codec file."
                continue

            skip_whitespace(fh)
            source_size = read_integer(fh)
            if source_size is None:
                error_str = "Missing integer (source interval size) in codec file." 
                continue

            skip_whitespace(fh)
            target_begin = read_integer(fh)
            if target_begin is None:
                error_str = "Missing integer (target interval begin) in codec file."
                continue

            source_end = source_begin + source_size
            list.append(section_list, [source_begin, source_end, target_begin])

            source_set.add_interval(Interval(source_begin, source_end))
            drain_set.add_interval(Interval(target_begin, target_begin + source_size))

    except EndOfStreamException:
        pass

    return source_set, drain_set, error_str