Example #1
0
def parse(fh, new_mode):
    source_reference = SourceRef.from_FileHandle(fh)

    identifier = read_option_start(fh)
    if identifier is None: return False

    verify_word_in_list(identifier, mode_option_info_db.keys(), "mode option",
                        fh.name, get_current_line_info_number(fh))

    if identifier == "skip":
        value = __parse_skip_option(fh, new_mode, identifier)

    elif identifier in ["skip_range", "skip_nested_range"]:
        value = __parse_range_skipper_option(fh, identifier, new_mode)

    elif identifier == "indentation":
        value = counter.parse_indentation(fh)
        value.set_containing_mode_name(new_mode.name)
        blackboard.required_support_indentation_count_set()

    elif identifier == "counter":
        value = counter.parse_line_column_counter(fh)

    elif identifier in ("entry", "exit", "restrict"):
        value = read_option_value(fh, ListF=True)  # A 'list' of strings
    else:
        value = read_option_value(fh)  # A single string

    # Finally, set the option
    new_mode.option_db.enter(identifier, value, source_reference,
                             new_mode.name)
    return True
Example #2
0
def parse(fh, new_mode):
    source_reference = SourceRef.from_FileHandle(fh)

    identifier = read_option_start(fh)
    if identifier is None: return False

    verify_word_in_list(identifier, mode_option_info_db.keys(),
                        "mode option", fh.name, get_current_line_info_number(fh))

    if   identifier == "skip":
        value = __parse_skip_option(fh, new_mode, identifier)

    elif identifier in ["skip_range", "skip_nested_range"]:
        value = __parse_range_skipper_option(fh, identifier, new_mode)
        
    elif identifier == "indentation":
        value = counter.parse_indentation(fh)
        value.set_containing_mode_name(new_mode.name)
        blackboard.required_support_indentation_count_set()

    elif identifier == "counter":
        value = counter.parse_line_column_counter(fh)

    elif identifier in ("entry", "exit", "restrict"):
        value = read_option_value(fh, ListF=True) # A 'list' of strings
    else:
        value = read_option_value(fh)             # A single string

    # Finally, set the option
    new_mode.option_db.enter(identifier, value, source_reference, new_mode.name)
    return True
Example #3
0
def __create_token_sender_by_character_code(fh, CharacterCode):
    # The '--' will prevent the token name from being printed
    prefix_less_token_name = "UCS_0x%06X" % CharacterCode
    token_id_str           = "0x%06X" % CharacterCode 
    blackboard.token_id_db["--" + prefix_less_token_name] = \
            TokenInfo(prefix_less_token_name, CharacterCode, None, fh.name, get_current_line_info_number(fh)) 
    return "self_send(%s);\n" % token_id_str
Example #4
0
def parse_token_id_definitions(fh, NamesOnlyF=False):
    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    token_prefix       = Setup.token_id_prefix
    token_prefix_plain = Setup.token_id_prefix_plain # i.e. without name space included

    if NamesOnlyF: db = {}
    else:          db = blackboard.token_id_db

    skip_whitespace(fh)
    if not check(fh, "{"):
        error_msg("missing opening '{' for after 'token' section identifier.\n", fh)

    while check(fh, "}") == False:
        skip_whitespace(fh)

        candidate = read_identifier(fh, TolerantF=True)

        if candidate == "":
            error_msg("Missing valid token identifier." % candidate, fh)

        # -- check the name, if it starts with the token prefix paste a warning
        if candidate.find(token_prefix) == 0:
            error_msg("Token identifier '%s' starts with token prefix '%s'.\n" % (candidate, token_prefix) + \
                      "Token prefix is mounted automatically. This token id appears in the source\n" + \
                      "code as '%s%s'." % (token_prefix, candidate), \
                      fh, DontExitF=True)
        elif candidate.find(token_prefix_plain) == 0:
            error_msg("Token identifier '%s' starts with token prefix '%s'.\n" % (candidate, token_prefix) + \
                      "Token prefix is mounted automatically. This token id appears in the source\n" + \
                      "code as '%s%s'." % (token_prefix, candidate), \
                      fh, DontExitF=True)

        skip_whitespace(fh)

        if NamesOnlyF:
            db[token_prefix + candidate] = True
            if check(fh, ";") == False:
                error_msg("Missing ';' after definition of token identifier '%s'.\n" % candidate + \
                          "This is mandatory since Quex version 0.50.1.", fh)
            continue

        # Parse a possible numeric value after '='
        numeric_value = None
        if check(fh, "="):
            skip_whitespace(fh)
            numeric_value = read_integer(fh)
            if numeric_value is None:
                error_msg("Missing number after '=' for token identifier '%s'." % candidate, fh)

        if check(fh, ";") == False:
            error_msg("Missing ';' after definition of token identifier '%s'.\n" % candidate + \
                      "This is mandatory since Quex version 0.50.1.", fh)

        db[candidate] = TokenInfo(candidate, numeric_value, Filename=fh.name, LineN=get_current_line_info_number(fh))

    if NamesOnlyF:
        result = db.keys()
        result.sort()
        return result
Example #5
0
def __create_token_sender_by_character_code(fh, CharacterCode):
    # The '--' will prevent the token name from being printed
    prefix_less_token_name = "UCS_0x%06X" % CharacterCode
    token_id_str = "0x%06X" % CharacterCode
    blackboard.token_id_db["--" + prefix_less_token_name] = \
            TokenInfo(prefix_less_token_name, CharacterCode, None, fh.name, get_current_line_info_number(fh))
    return "self_send(%s);\n" % token_id_str
Example #6
0
def __parse_brief_token_sender(fh, ContinueF):
    # shorthand for { self.send(TKN_SOMETHING); QUEX_SETTING_AFTER_SEND_CONTINUE_OR_RETURN(); }
    LanguageDB = Setup.language_db

    position = fh.tell()
    line_n = get_current_line_info_number(fh) + 1
    try:
        skip_whitespace(fh)
        position = fh.tell()

        code = __parse_token_id_specification_by_character_code(fh)
        if code != -1:
            code = __create_token_sender_by_character_code(fh, code)
        else:
            skip_whitespace(fh)
            identifier = __read_token_identifier(fh)
            skip_whitespace(fh)
            if identifier in ["GOTO", "GOSUB", "GOUP"]:
                code = __create_mode_transition_and_token_sender(
                    fh, identifier)
            else:
                code = __create_token_sender_by_token_name(fh, identifier)
                check_or_die(fh, ";")

        if code != "":
            if ContinueF:
                code += "QUEX_SETTING_AFTER_SEND_CONTINUE_OR_RETURN();\n"
            return UserCodeFragment(code, fh.name, line_n, LanguageDB)
        else:
            return None

    except EndOfStreamException:
        fh.seek(position)
        error_msg("End of file reached while parsing token shortcut.", fh)
Example #7
0
def parse(fh):
    """This function parses a mode description and enters it into the 
       'mode_description_db'. Once all modes are parsed
       they can be translated into 'real' modes and are located in
       'blackboard.mode_db'. 
    """

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    skip_whitespace(fh)
    mode_name = read_identifier(fh)
    if mode_name == "":
        error_msg("missing identifier at beginning of mode definition.", fh)

    # NOTE: constructor does register this mode in the mode_db
    new_mode  = ModeDescription(mode_name, fh.name, get_current_line_info_number(fh))

    # (*) inherited modes / options
    skip_whitespace(fh)
    dummy = fh.read(1)
    if dummy not in [":", "{"]:
        error_msg("missing ':' or '{' after mode '%s'" % mode_name, fh)

    if dummy == ":":
        __parse_option_list(new_mode, fh)

    # (*) read in pattern-action pairs and events
    while __parse_element(new_mode, fh): 
        pass

    # (*) check for modes w/o pattern definitions
    if not new_mode.has_event_handler() and not new_mode.has_own_matches():
        if new_mode.options["inheritable"] != "only":
            new_mode.options["inheritable"] = "only"
            error_msg("Mode without pattern and event handlers needs to be 'inheritable only'.\n" + \
                      "<inheritable: only> has been added automatically.", fh,  DontExitF=True)
Example #8
0
def __parse_brief_token_sender(fh, ContinueF):
    # shorthand for { self.send(TKN_SOMETHING); QUEX_SETTING_AFTER_SEND_CONTINUE_OR_RETURN(); }
    LanguageDB = Setup.language_db
    
    position = fh.tell()
    line_n   = get_current_line_info_number(fh) + 1
    try: 
        skip_whitespace(fh)
        position = fh.tell()

        code = __parse_token_id_specification_by_character_code(fh)
        if code != -1: 
            code = __create_token_sender_by_character_code(fh, code)
        else:
            skip_whitespace(fh)
            identifier = __read_token_identifier(fh)
            skip_whitespace(fh)
            if identifier in ["GOTO", "GOSUB", "GOUP"]:
                code = __create_mode_transition_and_token_sender(fh, identifier)
            else:
                code = __create_token_sender_by_token_name(fh, identifier)
                check_or_die(fh, ";")

        if code != "": 
            if ContinueF: code += "QUEX_SETTING_AFTER_SEND_CONTINUE_OR_RETURN();\n"
            return UserCodeFragment(code, fh.name, line_n, LanguageDB)
        else:
            return None

    except EndOfStreamException:
        fh.seek(position)
        error_msg("End of file reached while parsing token shortcut.", fh)
Example #9
0
 def set(self, Value, fh):
     if self.__value is not None:
         error_msg("%s has been defined more than once.\n" % self.name, fh, DontExitF=True)
         error_msg("previous definition has been here.\n", self.file_name, self.line_n)
                   
     self.__value   = Value
     self.file_name = fh.name
     self.line_n    = get_current_line_info_number(fh)
Example #10
0
def __parse_action(new_mode, fh, pattern_str, pattern):

    position = fh.tell()
    try:
        skip_whitespace(fh)
        position = fh.tell()

        code_obj = code_fragment.parse(fh,
                                       "regular expression",
                                       ErrorOnFailureF=False)
        if code_obj is not None:
            new_mode.add_match(pattern_str, code_obj, pattern)
            return

        fh.seek(position)
        word = read_until_letter(fh, [";"])
        if word == "PRIORITY-MARK":
            # This mark 'lowers' the priority of a pattern to the priority of the current
            # pattern index (important for inherited patterns, that have higher precedence).
            # The parser already constructed a state machine for the pattern that is to
            # be assigned a new priority. Since, this machine is not used, let us just
            # use its id.
            fh.seek(-1, 1)
            check_or_die(fh, ";",
                         ". Since quex version 0.33.5 this is required.")
            new_mode.add_match_priority(pattern_str, pattern,
                                        pattern.sm.get_id(), fh.name,
                                        get_current_line_info_number(fh))

        elif word == "DELETION":
            # This mark deletes any pattern that was inherited with the same 'name'
            fh.seek(-1, 1)
            check_or_die(fh, ";",
                         ". Since quex version 0.33.5 this is required.")
            new_mode.add_match_deletion(pattern_str, pattern, fh.name,
                                        get_current_line_info_number(fh))

        else:
            error_msg("Missing token '{', 'PRIORITY-MARK', 'DELETION', or '=>' after '%s'.\n" % pattern_str + \
                      "found: '%s'. Note, that since quex version 0.33.5 it is required to add a ';'\n" % word + \
                      "to the commands PRIORITY-MARK and DELETION.", fh)

    except EndOfStreamException:
        fh.seek(position)
        error_msg("End of file reached while parsing action code for pattern.",
                  fh)
Example #11
0
 def from_FileHandle(Fh, ModeName=""):
     if Fh != -1:
         if not hasattr(Fh, "name"): file_name = "command line"
         else: file_name = Fh.name
         line_n = get_current_line_info_number(Fh)
     else:
         file_name = "<command line>"
         line_n = -1
     return SourceRef(file_name, line_n, ModeName)
Example #12
0
    def set(self, Value, fh):
        if self.__value is not None:
            error_msg("%s has been defined more than once.\n" % self.name,
                      fh,
                      DontExitF=True)
            error_msg("previous definition has been here.\n", self.file_name,
                      self.line_n)

        self.__value = Value
        self.file_name = fh.name
        self.line_n = get_current_line_info_number(fh)
Example #13
0
 def __init__(self, Name, Default, FH=-1):
     self.name      = Name
     self.__default = Default
     if FH == -1:
         self.__value   = None
         self.file_name = ""
         self.line_n    = -1
     else:
         self.__value   = Default
         self.file_name = FH.name
         self.line_n    = get_current_line_info_number(FH)
     self.__pattern_string = None
Example #14
0
def parse_pattern_name_definitions(fh):
    """Parses pattern definitions of the form:
   
          WHITESPACE  [ \t\n]
          IDENTIFIER  [a-zA-Z0-9]+
          OP_PLUS     "+"
          
       That means: 'name' whitespace 'regular expression' whitespace newline.
       Comments can only be '//' nothing else and they have to appear at the
       beginning of the line.
       
       One regular expression can have more than one name, but one name can 
       only have one regular expression.
    """
    skip_whitespace(fh)
    if not check(fh, "{"):
        error_msg("define region must start with opening '{'.", fh)

    while 1 + 1 == 2:
        skip_whitespace(fh)

        if check(fh, "}"): 
            return
        
        # -- get the name of the pattern
        skip_whitespace(fh)
        pattern_name = read_identifier(fh)
        if pattern_name == "":
            error_msg("Missing identifier for pattern definition.", fh)

        skip_whitespace(fh)

        if check(fh, "}"): 
            error_msg("Missing regular expression for pattern definition '%s'." % \
                      pattern_name, fh)

        # A regular expression state machine
        # (No possible transformation into a particular codec whatever.
        #  the state machines are transformed once, after they are expanded
        #  as patterns in a mode.)
        regular_expression_str, pattern = \
                regular_expression.parse(fh, AllowNothingIsFineF = True, 
                                         AllowStateMachineTrafoF = False) 

        if pattern.has_pre_or_post_context():
            error_msg("Pattern definition with pre- and/or post-context.\n" + \
                      "Pre- and Post-Contexts can only be defined inside mode definitions.", fh)
        state_machine = pattern.sm

        blackboard.shorthand_db[pattern_name] = \
                blackboard.PatternShorthand(pattern_name, state_machine, 
                                            fh.name, get_current_line_info_number(fh),
                                            regular_expression_str)
Example #15
0
def parse_pattern_name_definitions(fh):
    """Parses pattern definitions of the form:
   
          WHITESPACE  [ \t\n]
          IDENTIFIER  [a-zA-Z0-9]+
          OP_PLUS     "+"
          
       That means: 'name' whitespace 'regular expression' whitespace newline.
       Comments can only be '//' nothing else and they have to appear at the
       beginning of the line.
       
       One regular expression can have more than one name, but one name can 
       only have one regular expression.
    """
    skip_whitespace(fh)
    if not check(fh, "{"):
        error_msg("define region must start with opening '{'.", fh)

    while 1 + 1 == 2:
        skip_whitespace(fh)

        if check(fh, "}"):
            return

        # -- get the name of the pattern
        skip_whitespace(fh)
        pattern_name = read_identifier(fh)
        if pattern_name == "":
            error_msg("Missing identifier for pattern definition.", fh)

        skip_whitespace(fh)

        if check(fh, "}"):
            error_msg("Missing regular expression for pattern definition '%s'." % \
                      pattern_name, fh)

        # A regular expression state machine
        # (No possible transformation into a particular codec whatever.
        #  the state machines are transformed once, after they are expanded
        #  as patterns in a mode.)
        regular_expression_str, pattern = \
                regular_expression.parse(fh, AllowNothingIsFineF = True,
                                         AllowStateMachineTrafoF = False)

        if pattern.has_pre_or_post_context():
            error_msg("Pattern definition with pre- and/or post-context.\n" + \
                      "Pre- and Post-Contexts can only be defined inside mode definitions.", fh)
        state_machine = pattern.sm

        blackboard.shorthand_db[pattern_name] = \
                blackboard.PatternShorthand(pattern_name, state_machine,
                                            fh.name, get_current_line_info_number(fh),
                                            regular_expression_str)
Example #16
0
 def __init__(self, Name, Default, FH=-1):
     self.name = Name
     self.__default = Default
     if FH == -1:
         self.__value = None
         self.file_name = ""
         self.line_n = -1
     else:
         self.__value = Default
         self.file_name = FH.name
         self.line_n = get_current_line_info_number(FH)
     self.__pattern_string = None
Example #17
0
def __parse_action(new_mode, fh, pattern_str, pattern):

    position = fh.tell()
    try:
        skip_whitespace(fh)
        position = fh.tell()
            
        code_obj = code_fragment.parse(fh, "regular expression", ErrorOnFailureF=False) 
        if code_obj is not None:
            new_mode.add_match(pattern_str, code_obj, pattern)
            return

        fh.seek(position)
        word = read_until_letter(fh, [";"])
        if word == "PRIORITY-MARK":
            # This mark 'lowers' the priority of a pattern to the priority of the current
            # pattern index (important for inherited patterns, that have higher precedence).
            # The parser already constructed a state machine for the pattern that is to
            # be assigned a new priority. Since, this machine is not used, let us just
            # use its id.
            fh.seek(-1, 1)
            check_or_die(fh, ";", ". Since quex version 0.33.5 this is required.")
            new_mode.add_match_priority(pattern_str, pattern, pattern.sm.get_id(), 
                                        fh.name, get_current_line_info_number(fh))

        elif word == "DELETION":
            # This mark deletes any pattern that was inherited with the same 'name'
            fh.seek(-1, 1)
            check_or_die(fh, ";", ". Since quex version 0.33.5 this is required.")
            new_mode.add_match_deletion(pattern_str, pattern, fh.name, get_current_line_info_number(fh))
            
        else:
            error_msg("Missing token '{', 'PRIORITY-MARK', 'DELETION', or '=>' after '%s'.\n" % pattern_str + \
                      "found: '%s'. Note, that since quex version 0.33.5 it is required to add a ';'\n" % word + \
                      "to the commands PRIORITY-MARK and DELETION.", fh)


    except EndOfStreamException:
        fh.seek(position)
        error_msg("End of file reached while parsing action code for pattern.", fh)
Example #18
0
def token_id_db_verify_or_enter_token_id(fh, TokenName):
    global Setup

    prefix_less_TokenName = cut_token_prefix_or_die(fh, TokenName)

    # Occasionally add token id automatically to database
    if not blackboard.token_id_db.has_key(prefix_less_TokenName):
        # DO NOT ENFORCE THE TOKEN ID TO BE DEFINED, BECAUSE WHEN THE TOKEN ID
        # IS DEFINED IN C-CODE, THE IDENTIFICATION IS NOT 100% SAFE.
        msg = "Token id '%s' defined implicitly." % TokenName
        if TokenName in blackboard.token_id_db.keys():
            msg += "\nNOTE: '%s' has been defined in a token { ... } section!" % \
                   (Setup.token_id_prefix + TokenName)
            msg += "\nNote, that tokens in the token { ... } section are automatically prefixed."
            error_msg(msg, fh, DontExitF=True)
        else:
            blackboard.token_id_implicit_list.append([
                prefix_less_TokenName, fh.name,
                get_current_line_info_number(fh)
            ])

        # Enter the implicit token id definition in the database
        blackboard.token_id_db[prefix_less_TokenName] = \
                TokenInfo(prefix_less_TokenName, None, None, fh.name, get_current_line_info_number(fh))
Example #19
0
    def __init__(self, fh=-1):
        self.fh = fh
        if fh != -1:
            self.file_name = fh.name
            self.line_n    = get_current_line_info_number(fh)
        else:
            self.file_name = "no file handle"
            self.line_n    = -1

        self.space_db = {}  # Maps: space width --> character_set
        self.grid_db  = {}  # Maps: grid width  --> character_set
        self.bad_character_set                = LocalizedParameter("bad",        NumberSet())
        self.newline_state_machine            = LocalizedParameter("newline",    None)
        self.newline_suppressor_state_machine = LocalizedParameter("suppressor", None)

        self.__containing_mode_name = ""
Example #20
0
    def __init__(self, fh=-1):
        self.fh = fh
        if fh != -1:
            self.file_name = fh.name
            self.line_n = get_current_line_info_number(fh)
        else:
            self.file_name = "no file handle"
            self.line_n = -1

        self.space_db = {}  # Maps: space width --> character_set
        self.grid_db = {}  # Maps: grid width  --> character_set
        self.bad_character_set = LocalizedParameter("bad", NumberSet())
        self.newline_state_machine = LocalizedParameter("newline", None)
        self.newline_suppressor_state_machine = LocalizedParameter(
            "suppressor", None)

        self.__containing_mode_name = ""
Example #21
0
def token_id_db_verify_or_enter_token_id(fh, TokenName):
    global Setup

    prefix_less_TokenName = cut_token_prefix_or_die(fh, TokenName)

    # Occasionally add token id automatically to database
    if not blackboard.token_id_db.has_key(prefix_less_TokenName):
        # DO NOT ENFORCE THE TOKEN ID TO BE DEFINED, BECAUSE WHEN THE TOKEN ID
        # IS DEFINED IN C-CODE, THE IDENTIFICATION IS NOT 100% SAFE.
        msg = "Token id '%s' defined implicitly." % TokenName
        if TokenName in blackboard.token_id_db.keys():
            msg += "\nNOTE: '%s' has been defined in a token { ... } section!" % \
                   (Setup.token_id_prefix + TokenName)
            msg += "\nNote, that tokens in the token { ... } section are automatically prefixed."
            error_msg(msg, fh, DontExitF=True)
        else:
            blackboard.token_id_implicit_list.append([prefix_less_TokenName, fh.name, get_current_line_info_number(fh)])

        # Enter the implicit token id definition in the database
        blackboard.token_id_db[prefix_less_TokenName] = \
                TokenInfo(prefix_less_TokenName, None, None, fh.name, get_current_line_info_number(fh)) 
Example #22
0
def parse(fh):
    """This function parses a mode description and enters it into the 
       'mode_description_db'. Once all modes are parsed
       they can be translated into 'real' modes and are located in
       'blackboard.mode_db'. 
    """

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    skip_whitespace(fh)
    mode_name = read_identifier(fh)
    if mode_name == "":
        error_msg("missing identifier at beginning of mode definition.", fh)

    # NOTE: constructor does register this mode in the mode_db
    new_mode = ModeDescription(mode_name, fh.name,
                               get_current_line_info_number(fh))

    # (*) inherited modes / options
    skip_whitespace(fh)
    dummy = fh.read(1)
    if dummy not in [":", "{"]:
        error_msg("missing ':' or '{' after mode '%s'" % mode_name, fh)

    if dummy == ":":
        __parse_option_list(new_mode, fh)

    # (*) read in pattern-action pairs and events
    while __parse_element(new_mode, fh):
        pass

    # (*) check for modes w/o pattern definitions
    if not new_mode.has_event_handler() and not new_mode.has_own_matches():
        if new_mode.options["inheritable"] != "only":
            new_mode.options["inheritable"] = "only"
            error_msg("Mode without pattern and event handlers needs to be 'inheritable only'.\n" + \
                      "<inheritable: only> has been added automatically.", fh,  DontExitF=True)
Example #23
0
def parse(fh):
    descriptor = TokenTypeDescriptorCore()

    if not check(fh, "{"):
        error_msg("Missing opening '{' at begin of token_type definition", fh)

    already_defined_list = []
    position = fh.tell()
    begin_line_n = get_current_line_info_number(fh)
    result = True
    while result == True:
        try:
            x = fh.tell()
            fh.seek(x)
            result = parse_section(fh, descriptor, already_defined_list)
        except EndOfStreamException:
            fh.seek(position)
            error_msg("End of file reached while parsing token_type section.",
                      fh)

    if not check(fh, "}"):
        fh.seek(position)
        error_msg("Missing closing '}' at end of token_type definition.", fh)

    result = TokenTypeDescriptor(descriptor, fh.name, begin_line_n)
    if     len(result.get_member_db()) == 0 \
       and result.class_name == "Token" \
       and result.token_id_type.__class__.__name__      == "CodeFragment" \
       and result.column_number_type.__class__.__name__ == "CodeFragment" \
       and result.line_number_type.__class__.__name__   == "CodeFragment":
        error_msg("Section 'token_type' does not define any members, does not\n" + \
                  "modify any standard member types, nor does it define a class\n" + \
                  "different from 'Token'.", fh)

    result.consistency_check()
    return result
Example #24
0
def parse(fh):
    descriptor = TokenTypeDescriptorCore()

    if not check(fh, "{"):
        error_msg("Missing opening '{' at begin of token_type definition", fh)

    already_defined_list = []
    position = fh.tell()
    begin_line_n = get_current_line_info_number(fh)
    result   = True
    while result == True:
        try: 
            x = fh.tell()
            fh.seek(x)
            result = parse_section(fh, descriptor, already_defined_list)
        except EndOfStreamException:
            fh.seek(position)
            error_msg("End of file reached while parsing token_type section.", fh)

        
    if not check(fh, "}"):
        fh.seek(position)
        error_msg("Missing closing '}' at end of token_type definition.", fh);

    result = TokenTypeDescriptor(descriptor, fh.name, begin_line_n)
    if     len(result.get_member_db()) == 0 \
       and result.class_name == "Token" \
       and result.token_id_type.__class__.__name__      == "CodeFragment" \
       and result.column_number_type.__class__.__name__ == "CodeFragment" \
       and result.line_number_type.__class__.__name__   == "CodeFragment":
        error_msg("Section 'token_type' does not define any members, does not\n" + \
                  "modify any standard member types, nor does it define a class\n" + \
                  "different from 'Token'.", fh)

    result.consistency_check()
    return result
Example #25
0
def __parse_option(fh, new_mode):
    def get_pattern_object(SM):
        if not SM.is_DFA_compliant(): result = nfa_to_dfa.do(SM)
        else: result = SM
        result = hopcroft.do(result, CreateNewStateMachineF=False)
        return Pattern(result, AllowStateMachineTrafoF=True)

    identifier = read_option_start(fh)
    if identifier is None: return False

    verify_word_in_list(identifier, mode_option_info_db.keys(), "mode option",
                        fh.name, get_current_line_info_number(fh))

    if identifier == "skip":
        # A skipper 'eats' characters at the beginning of a pattern that belong
        # to a specified set of characters. A useful application is most probably
        # the whitespace skipper '[ \t\n]'. The skipper definition allows quex to
        # implement a very effective way to skip these regions.
        pattern_str, trigger_set = regular_expression.parse_character_set(
            fh, PatternStringF=True)
        skip_whitespace(fh)

        if fh.read(1) != ">":
            error_msg("missing closing '>' for mode option '%s'." % identifier,
                      fh)

        if trigger_set.is_empty():
            error_msg("Empty trigger set for skipper." % identifier, fh)

        # TriggerSet skipping is implemented the following way: As soon as one element of the
        # trigger set appears, the state machine enters the 'trigger set skipper section'.
        # Enter the skipper as if the opener pattern was a normal pattern and the 'skipper' is the action.
        # NOTE: The correspondent CodeFragment for skipping is created in 'implement_skippers(...)'
        pattern_sm = StateMachine()
        pattern_sm.add_transition(pattern_sm.init_state_index,
                                  trigger_set,
                                  AcceptanceF=True)

        # Skipper code is to be generated later
        action = GeneratedCode(skip_character_set.do,
                               FileName=fh.name,
                               LineN=get_current_line_info_number(fh))
        action.data["character_set"] = trigger_set

        new_mode.add_match(pattern_str,
                           action,
                           get_pattern_object(pattern_sm),
                           Comment=E_SpecialPatterns.SKIP)

        return True

    elif identifier in ["skip_range", "skip_nested_range"]:
        # A non-nesting skipper can contain a full fledged regular expression as opener,
        # since it only effects the trigger. Not so the nested range skipper-see below.

        # -- opener
        skip_whitespace(fh)
        if identifier == "skip_nested_range":
            # Nested range state machines only accept 'strings' not state machines
            opener_str, opener_sequence = __parse_string(
                fh, "Opener pattern for 'skip_nested_range'")
            opener_sm = StateMachine.from_sequence(opener_sequence)
        else:
            opener_str, opener_pattern = regular_expression.parse(fh)
            opener_sm = opener_pattern.sm
            # For 'range skipping' the opener sequence is not needed, only the opener state
            # machine is webbed into the pattern matching state machine.
            opener_sequence = None

        skip_whitespace(fh)

        # -- closer
        closer_str, closer_sequence = __parse_string(
            fh, "Closing pattern for 'skip_range' or 'skip_nested_range'")
        skip_whitespace(fh)
        if fh.read(1) != ">":
            error_msg("missing closing '>' for mode option '%s'" % identifier,
                      fh)

        # Skipper code is to be generated later
        generator_function, comment = {
            "skip_range": (skip_range.do, E_SpecialPatterns.SKIP_RANGE),
            "skip_nested_range":
            (skip_nested_range.do, E_SpecialPatterns.SKIP_NESTED_RANGE),
        }[identifier]
        action = GeneratedCode(generator_function,
                               FileName=fh.name,
                               LineN=get_current_line_info_number(fh))

        action.data["opener_sequence"] = opener_sequence
        action.data["closer_sequence"] = closer_sequence
        action.data["mode_name"] = new_mode.name

        new_mode.add_match(opener_str,
                           action,
                           get_pattern_object(opener_sm),
                           Comment=comment)

        return True

    elif identifier == "indentation":
        value = indentation_setup.do(fh)

        # Enter 'Newline' and 'Suppressed Newline' as matches into the engine.
        # Similar to skippers, the indentation count is then triggered by the newline.
        # -- Suppressed Newline = Suppressor followed by Newline,
        #    then newline does not trigger indentation counting.
        suppressed_newline_pattern_str = ""
        if value.newline_suppressor_state_machine.get() is not None:
            suppressed_newline_pattern_str = \
                  "(" + value.newline_suppressor_state_machine.pattern_string() + ")" \
                + "(" + value.newline_state_machine.pattern_string() + ")"

            suppressed_newline_sm = \
                sequentialize.do([value.newline_suppressor_state_machine.get(),
                                  value.newline_state_machine.get()])

            FileName = value.newline_suppressor_state_machine.file_name
            LineN = value.newline_suppressor_state_machine.line_n
            # Go back to start.
            code = UserCodeFragment("goto %s;" % get_label("$start", U=True),
                                    FileName, LineN)

            new_mode.add_match(
                suppressed_newline_pattern_str,
                code,
                get_pattern_object(suppressed_newline_sm),
                Comment=E_SpecialPatterns.SUPPRESSED_INDENTATION_NEWLINE)

        # When there is an empty line, then there shall be no indentation count on it.
        # Here comes the trick:
        #
        #      Let               newline
        #      be defined as:    newline ([space]* newline])*
        #
        # This way empty lines are eating away before the indentation count is activated.

        # -- 'space'
        x0 = StateMachine()
        x0.add_transition(x0.init_state_index,
                          value.indentation_count_character_set(),
                          AcceptanceF=True)
        # -- '[space]*'
        x1 = repeat.do(x0)
        # -- '[space]* newline'
        x2 = sequentialize.do([x1, value.newline_state_machine.get()])
        # -- '([space]* newline)*'
        x3 = repeat.do(x2)
        # -- 'newline ([space]* newline)*'
        x4 = sequentialize.do([value.newline_state_machine.get(), x3])
        # -- nfa to dfa; hopcroft optimization
        sm = beautifier.do(x4)

        FileName = value.newline_state_machine.file_name
        LineN = value.newline_state_machine.line_n
        action = GeneratedCode(indentation_counter.do, FileName, LineN)

        action.data["indentation_setup"] = value

        new_mode.add_match(value.newline_state_machine.pattern_string(),
                           action,
                           get_pattern_object(sm),
                           Comment=E_SpecialPatterns.INDENTATION_NEWLINE)

        # Announce the mode to which the setup belongs
        value.set_containing_mode_name(new_mode.name)
    else:
        value = read_option_value(fh)

    # The 'verify_word_in_list()' call must have ensured that the following holds
    assert mode_option_info_db.has_key(identifier)

    # Is the option of the appropriate value?
    option_info = mode_option_info_db[identifier]
    if option_info.domain is not None and value not in option_info.domain:
        error_msg("Tried to set value '%s' for option '%s'. " % (value, identifier) + \
                  "Though, possible for this option are only: %s." % repr(option_info.domain)[1:-1], fh)

    # Finally, set the option
    new_mode.add_option(identifier, value)

    return True
Example #26
0
def parse_section(fh):
    global default_token_type_definition_triggered_by_mode_definition_f

    # NOTE: End of File is supposed to be reached when trying to read a new
    #       section. Thus, the end-of-file catcher does not encompass the beginning.
    position = fh.tell()
    skip_whitespace(fh)
    word = read_identifier(fh)
    if word == "":
        error_msg("Missing section title.", fh)

    verify_word_in_list(word, blackboard.all_section_title_list,
                        "Unknown quex section '%s'" % word, fh)
    try:
        # (*) determine what is defined
        #
        #     -- 'mode { ... }'     => define a mode
        #     -- 'start = ...;'     => define the name of the initial mode
        #     -- 'header { ... }'   => define code that is to be pasted on top
        #                              of the engine (e.g. "#include<...>")
        #     -- 'body { ... }'     => define code that is to be pasted in the class' body
        #                              of the engine (e.g. "public: int  my_member;")
        #     -- 'init { ... }'     => define code that is to be pasted in the class' constructors
        #                              of the engine (e.g. "my_member = -1;")
        #     -- 'define { ... }'   => define patterns shorthands such as IDENTIFIER for [a-z]+
        #     -- 'repeated_token_id = QUEX_TKN_ ...;' => enables token repetition, defines
        #                                                the token id to be repeated.
        #     -- 'token { ... }'    => define token ids
        #     -- 'token_type { ... }'  => define a customized token type
        #
        if word in blackboard.fragment_db.keys():
            element_name = blackboard.fragment_db[word]
            fragment = code_fragment.parse(fh,
                                           word,
                                           AllowBriefTokenSenderF=False)
            blackboard.__dict__[element_name] = fragment
            return

        elif word == "start":
            mode_name = parse_identifier_assignment(fh)
            if mode_name == "":
                error_msg("Missing mode_name after 'start ='", fh)
            elif blackboard.initial_mode.get_pure_code() != "":
                error_msg("start mode defined more than once!",
                          fh,
                          DontExitF=True)
                error_msg("previously defined here",
                          blackboard.initial_mode.filename,
                          blackboard.initial_mode.line_n)

            blackboard.initial_mode = UserCodeFragment(
                mode_name, fh.name, get_current_line_info_number(fh))
            return

        elif word == "repeated_token":
            blackboard.token_repetition_token_id_list = parse_token_id_definitions(
                fh, NamesOnlyF=True)
            for token_name in blackboard.token_repetition_token_id_list:
                verify_word_in_list(token_name[len(Setup.token_id_prefix):],
                                    blackboard.token_id_db.keys(),
                                    "Token ID '%s' not yet defined." %
                                    token_name,
                                    fh,
                                    ExitF=False)
            return

        elif word == "define":
            parse_pattern_name_definitions(fh)
            return

        elif word == "token":
            parse_token_id_definitions(fh)
            return

        elif word == "token_type":

            if Setup.token_class_file != "":
                error_msg("Token type definition inadmissible while specifying on the command line\n" + \
                          "the file %s to contain a manually written token class." % repr(Setup.token_class_file),
                          fh)

            if blackboard.token_type_definition is None:
                blackboard.token_type_definition = token_type.parse(fh)
                return

            # Error case:
            if default_token_type_definition_triggered_by_mode_definition_f:
                error_msg(
                    "Section 'token_type' must appear before first mode definition.",
                    fh)
            else:
                error_msg("Section 'token_type' has been defined twice.",
                          fh,
                          DontExitF=True)
                error_msg(
                    "Previously defined here.", blackboard.
                    token_type_definition.file_name_of_token_type_definition,
                    blackboard.token_type_definition.
                    line_n_of_token_type_definition)
            return

        elif word == "mode":
            # When the first mode is parsed then a token_type definition must be
            # present. If not, the default token type definition is considered.
            if blackboard.token_type_definition is None:
                parse_default_token_definition()
                default_token_type_definition_triggered_by_mode_definition_f = True

            mode.parse(fh)
            return

        else:
            # This case should have been caught by the 'verify_word_in_list' function
            assert False

    except EndOfStreamException:
        fh.seek(position)
        error_msg("End of file reached while parsing '%s' section" % word, fh)
Example #27
0
def parse_token_id_definitions(fh, NamesOnlyF=False):
    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    token_prefix = Setup.token_id_prefix
    token_prefix_plain = Setup.token_id_prefix_plain  # i.e. without name space included

    if NamesOnlyF: db = {}
    else: db = blackboard.token_id_db

    skip_whitespace(fh)
    if not check(fh, "{"):
        error_msg(
            "missing opening '{' for after 'token' section identifier.\n", fh)

    while check(fh, "}") == False:
        skip_whitespace(fh)

        candidate = read_identifier(fh, TolerantF=True)

        if candidate == "":
            error_msg("Missing valid token identifier." % candidate, fh)

        # -- check the name, if it starts with the token prefix paste a warning
        if candidate.find(token_prefix) == 0:
            error_msg("Token identifier '%s' starts with token prefix '%s'.\n" % (candidate, token_prefix) + \
                      "Token prefix is mounted automatically. This token id appears in the source\n" + \
                      "code as '%s%s'." % (token_prefix, candidate), \
                      fh, DontExitF=True)
        elif candidate.find(token_prefix_plain) == 0:
            error_msg("Token identifier '%s' starts with token prefix '%s'.\n" % (candidate, token_prefix) + \
                      "Token prefix is mounted automatically. This token id appears in the source\n" + \
                      "code as '%s%s'." % (token_prefix, candidate), \
                      fh, DontExitF=True)

        skip_whitespace(fh)

        if NamesOnlyF:
            db[token_prefix + candidate] = True
            if check(fh, ";") == False:
                error_msg("Missing ';' after definition of token identifier '%s'.\n" % candidate + \
                          "This is mandatory since Quex version 0.50.1.", fh)
            continue

        # Parse a possible numeric value after '='
        numeric_value = None
        if check(fh, "="):
            skip_whitespace(fh)
            numeric_value = read_integer(fh)
            if numeric_value is None:
                error_msg(
                    "Missing number after '=' for token identifier '%s'." %
                    candidate, fh)

        if check(fh, ";") == False:
            error_msg("Missing ';' after definition of token identifier '%s'.\n" % candidate + \
                      "This is mandatory since Quex version 0.50.1.", fh)

        db[candidate] = TokenInfo(candidate,
                                  numeric_value,
                                  Filename=fh.name,
                                  LineN=get_current_line_info_number(fh))

    if NamesOnlyF:
        result = db.keys()
        result.sort()
        return result
Example #28
0
def __parse_normal(fh, code_fragment_name):
    LanguageDB = Setup.language_db

    line_n = get_current_line_info_number(fh) + 1
    code   = read_until_closing_bracket(fh, "{", "}")
    return UserCodeFragment(code, fh.name, line_n, LanguageDB)
Example #29
0
def __parse_option(fh, new_mode):
    def get_pattern_object(SM):
        if not SM.is_DFA_compliant(): result = nfa_to_dfa.do(SM)
        else:                         result = SM
        result = hopcroft.do(result, CreateNewStateMachineF=False)
        return Pattern(result, AllowStateMachineTrafoF=True)

    identifier = read_option_start(fh)
    if identifier is None: return False

    verify_word_in_list(identifier, mode_option_info_db.keys(),
                        "mode option", fh.name, get_current_line_info_number(fh))

    if identifier == "skip":
        # A skipper 'eats' characters at the beginning of a pattern that belong
        # to a specified set of characters. A useful application is most probably
        # the whitespace skipper '[ \t\n]'. The skipper definition allows quex to
        # implement a very effective way to skip these regions.
        pattern_str, trigger_set = regular_expression.parse_character_set(fh, PatternStringF=True)
        skip_whitespace(fh)

        if fh.read(1) != ">":
            error_msg("missing closing '>' for mode option '%s'." % identifier, fh)

        if trigger_set.is_empty():
            error_msg("Empty trigger set for skipper." % identifier, fh)

        # TriggerSet skipping is implemented the following way: As soon as one element of the 
        # trigger set appears, the state machine enters the 'trigger set skipper section'.
        # Enter the skipper as if the opener pattern was a normal pattern and the 'skipper' is the action.
        # NOTE: The correspondent CodeFragment for skipping is created in 'implement_skippers(...)'
        pattern_sm  = StateMachine()
        pattern_sm.add_transition(pattern_sm.init_state_index, trigger_set, AcceptanceF=True)

        # Skipper code is to be generated later
        action = GeneratedCode(skip_character_set.do, 
                               FileName = fh.name, 
                               LineN    = get_current_line_info_number(fh))
        action.data["character_set"] = trigger_set

        new_mode.add_match(pattern_str, action, get_pattern_object(pattern_sm), 
                           Comment=E_SpecialPatterns.SKIP)

        return True

    elif identifier in ["skip_range", "skip_nested_range"]:
        # A non-nesting skipper can contain a full fledged regular expression as opener,
        # since it only effects the trigger. Not so the nested range skipper-see below.

        # -- opener
        skip_whitespace(fh)
        if identifier == "skip_nested_range":
            # Nested range state machines only accept 'strings' not state machines
            opener_str, opener_sequence = __parse_string(fh, "Opener pattern for 'skip_nested_range'")
            opener_sm = StateMachine.from_sequence(opener_sequence)
        else:
            opener_str, opener_pattern = regular_expression.parse(fh)
            opener_sm = opener_pattern.sm
            # For 'range skipping' the opener sequence is not needed, only the opener state
            # machine is webbed into the pattern matching state machine.
            opener_sequence       = None

        skip_whitespace(fh)

        # -- closer
        closer_str, closer_sequence = __parse_string(fh, "Closing pattern for 'skip_range' or 'skip_nested_range'")
        skip_whitespace(fh)
        if fh.read(1) != ">":
            error_msg("missing closing '>' for mode option '%s'" % identifier, fh)

        # Skipper code is to be generated later
        generator_function, comment = { 
                "skip_range":        (skip_range.do,        E_SpecialPatterns.SKIP_RANGE),
                "skip_nested_range": (skip_nested_range.do, E_SpecialPatterns.SKIP_NESTED_RANGE),
        }[identifier]
        action = GeneratedCode(generator_function,
                               FileName = fh.name, 
                               LineN    = get_current_line_info_number(fh))

        action.data["opener_sequence"] = opener_sequence
        action.data["closer_sequence"] = closer_sequence
        action.data["mode_name"]       = new_mode.name

        new_mode.add_match(opener_str, action, get_pattern_object(opener_sm), Comment=comment)

        return True
        
    elif identifier == "indentation":
        value = indentation_setup.do(fh)

        # Enter 'Newline' and 'Suppressed Newline' as matches into the engine.
        # Similar to skippers, the indentation count is then triggered by the newline.
        # -- Suppressed Newline = Suppressor followed by Newline,
        #    then newline does not trigger indentation counting.
        suppressed_newline_pattern_str = ""
        if value.newline_suppressor_state_machine.get() is not None:
            suppressed_newline_pattern_str = \
                  "(" + value.newline_suppressor_state_machine.pattern_string() + ")" \
                + "(" + value.newline_state_machine.pattern_string() + ")"
                                           
            suppressed_newline_sm = \
                sequentialize.do([value.newline_suppressor_state_machine.get(),
                                  value.newline_state_machine.get()])
                 
            FileName = value.newline_suppressor_state_machine.file_name
            LineN    = value.newline_suppressor_state_machine.line_n
            # Go back to start.
            code = UserCodeFragment("goto %s;" % get_label("$start", U=True), FileName, LineN)

            new_mode.add_match(suppressed_newline_pattern_str, code, 
                               get_pattern_object(suppressed_newline_sm),
                               Comment=E_SpecialPatterns.SUPPRESSED_INDENTATION_NEWLINE)

        # When there is an empty line, then there shall be no indentation count on it.
        # Here comes the trick: 
        #
        #      Let               newline         
        #      be defined as:    newline ([space]* newline])*
        # 
        # This way empty lines are eating away before the indentation count is activated.

        # -- 'space'
        x0 = StateMachine()
        x0.add_transition(x0.init_state_index, value.indentation_count_character_set(), 
                          AcceptanceF=True)
        # -- '[space]*'
        x1 = repeat.do(x0)
        # -- '[space]* newline'
        x2 = sequentialize.do([x1, value.newline_state_machine.get()])
        # -- '([space]* newline)*'
        x3 = repeat.do(x2)
        # -- 'newline ([space]* newline)*'
        x4 = sequentialize.do([value.newline_state_machine.get(), x3])
        # -- nfa to dfa; hopcroft optimization
        sm = beautifier.do(x4)

        FileName = value.newline_state_machine.file_name
        LineN    = value.newline_state_machine.line_n
        action   = GeneratedCode(indentation_counter.do, FileName, LineN)

        action.data["indentation_setup"] = value

        new_mode.add_match(value.newline_state_machine.pattern_string(), action, 
                           get_pattern_object(sm), 
                           Comment=E_SpecialPatterns.INDENTATION_NEWLINE)

        # Announce the mode to which the setup belongs
        value.set_containing_mode_name(new_mode.name)
    else:
        value = read_option_value(fh)

    # The 'verify_word_in_list()' call must have ensured that the following holds
    assert mode_option_info_db.has_key(identifier)

    # Is the option of the appropriate value?
    option_info = mode_option_info_db[identifier]
    if option_info.domain is not None and value not in option_info.domain:
        error_msg("Tried to set value '%s' for option '%s'. " % (value, identifier) + \
                  "Though, possible for this option are only: %s." % repr(option_info.domain)[1:-1], fh)

    # Finally, set the option
    new_mode.add_option(identifier, value)

    return True
Example #30
0
def parse_variable_definition(fh, GroupF=False, already_defined_list=[]):
    """PURPOSE: Parsing of a variable definition consisting of 'type' and 'name.
                Members can be mentioned together in a group, which means that
                they can appear simultaneously. Possible expresions are

                (1) single variables:

                              name0 : type;
                              name1 : type[32];
                              name2 : type*;

                (2) combined variables

                              {
                                  sub_name0 : type0;
                                  sub_name1 : type[64];
                                  sub_name2 : type1*;
                              }

       ARGUMENTS: 

        'GroupF'               allows to have 'nested variable groups' in curly brackets

        'already_defined_list' informs about variable names that have been already
                               chosen. It is only used for groups.

       RETURNS:
                 None        on failure to pass a variable definition.
                 array       when a single variable definition was found. 
                                array[0] = UserCodeFragment containing the type. 
                                array[1] = name of the variable.
                 dictionary  if it was a combined variable definition. The dictionary
                               maps: (variable name) ---> (UserCodeFragment with type)
    
    """
    position = fh.tell()

    line_n = get_current_line_info_number(fh)
    skip_whitespace(fh)
    name_str = read_identifier(fh)
    if name_str == "":
        if not GroupF or not check(fh, "{"):
            fh.seek(position)
            return None
        sub_db = parse_variable_definition_list(fh,
                                                "Concurrent union variables",
                                                already_defined_list)
        if not check(fh, "}"):
            fh.seek(position)
            error_msg(
                "Missing closing '}' after concurrent variable definition.",
                fh)
        return [sub_db]

    else:
        name_str = name_str.strip()
        if not check(fh, ":"):
            error_msg("Missing ':' after identifier '%s'." % name_str, fh)

        if fh.read(1).isspace() == False:
            error_msg("Missing whitespace after ':' after identifier '%s'.\n" % name_str \
                    + "The notation has to be: variable-name ':' type ';'.", fh)

        type_str, i = read_until_letter(fh, ";", Verbose=True)
        if i == -1: error_msg("missing ';'", fh)
        type_str = type_str.strip()

        return [UserCodeFragment(type_str, fh.name, line_n), name_str]
Example #31
0
def parse_section(fh):
    global default_token_type_definition_triggered_by_mode_definition_f

    # NOTE: End of File is supposed to be reached when trying to read a new
    #       section. Thus, the end-of-file catcher does not encompass the beginning.
    position = fh.tell()
    skip_whitespace(fh)
    word = read_identifier(fh)
    if word == "":
        error_msg("Missing section title.", fh)

    verify_word_in_list(word, blackboard.all_section_title_list, 
                        "Unknown quex section '%s'" % word, fh)
    try:
        # (*) determine what is defined
        #
        #     -- 'mode { ... }'     => define a mode
        #     -- 'start = ...;'     => define the name of the initial mode
        #     -- 'header { ... }'   => define code that is to be pasted on top
        #                              of the engine (e.g. "#include<...>")
        #     -- 'body { ... }'     => define code that is to be pasted in the class' body
        #                              of the engine (e.g. "public: int  my_member;")
        #     -- 'init { ... }'     => define code that is to be pasted in the class' constructors
        #                              of the engine (e.g. "my_member = -1;")
        #     -- 'define { ... }'   => define patterns shorthands such as IDENTIFIER for [a-z]+
        #     -- 'repeated_token_id = QUEX_TKN_ ...;' => enables token repetition, defines
        #                                                the token id to be repeated.
        #     -- 'token { ... }'    => define token ids
        #     -- 'token_type { ... }'  => define a customized token type
        #
        if word in blackboard.fragment_db.keys():
            element_name = blackboard.fragment_db[word]
            fragment     = code_fragment.parse(fh, word, AllowBriefTokenSenderF=False)        
            blackboard.__dict__[element_name] = fragment
            return

        elif word == "start":
            mode_name = parse_identifier_assignment(fh)
            if mode_name == "":
                error_msg("Missing mode_name after 'start ='", fh)
            elif blackboard.initial_mode.get_pure_code() != "":
                error_msg("start mode defined more than once!", fh, DontExitF=True)
                error_msg("previously defined here",
                          blackboard.initial_mode.filename,
                          blackboard.initial_mode.line_n)
        
            blackboard.initial_mode = UserCodeFragment(mode_name, fh.name, 
                                                       get_current_line_info_number(fh))
            return

        elif word == "repeated_token":
            blackboard.token_repetition_token_id_list = parse_token_id_definitions(fh, NamesOnlyF=True)
            for token_name in blackboard.token_repetition_token_id_list:
                verify_word_in_list(token_name[len(Setup.token_id_prefix):],
                                    blackboard.token_id_db.keys(),
                                    "Token ID '%s' not yet defined." % token_name,
                                    fh, ExitF=False)
            return
            
        elif word == "define":
            parse_pattern_name_definitions(fh)
            return

        elif word == "token":       
            parse_token_id_definitions(fh)
            return

        elif word == "token_type":       

            if Setup.token_class_file != "":
                error_msg("Token type definition inadmissible while specifying on the command line\n" + \
                          "the file %s to contain a manually written token class." % repr(Setup.token_class_file),
                          fh)
       
            if blackboard.token_type_definition is None:
                blackboard.token_type_definition = token_type.parse(fh)
                return

            # Error case:
            if default_token_type_definition_triggered_by_mode_definition_f:
                error_msg("Section 'token_type' must appear before first mode definition.", fh)
            else:
                error_msg("Section 'token_type' has been defined twice.", fh, DontExitF=True)
                error_msg("Previously defined here.",
                          blackboard.token_type_definition.file_name_of_token_type_definition,
                          blackboard.token_type_definition.line_n_of_token_type_definition)
            return

        elif word == "mode":
            # When the first mode is parsed then a token_type definition must be 
            # present. If not, the default token type definition is considered.
            if blackboard.token_type_definition is None:
                parse_default_token_definition()
                default_token_type_definition_triggered_by_mode_definition_f = True

            mode.parse(fh)
            return

        else:
            # This case should have been caught by the 'verify_word_in_list' function
            assert False

    except EndOfStreamException:
        fh.seek(position)
        error_msg("End of file reached while parsing '%s' section" % word, fh)
Example #32
0
def __parse_normal(fh, code_fragment_name):
    LanguageDB = Setup.language_db

    line_n = get_current_line_info_number(fh) + 1
    code = read_until_closing_bracket(fh, "{", "}")
    return UserCodeFragment(code, fh.name, line_n, LanguageDB)
Example #33
0
def parse_variable_definition(fh, GroupF=False, already_defined_list=[]):
    """PURPOSE: Parsing of a variable definition consisting of 'type' and 'name.
                Members can be mentioned together in a group, which means that
                they can appear simultaneously. Possible expresions are

                (1) single variables:

                              name0 : type;
                              name1 : type[32];
                              name2 : type*;

                (2) combined variables

                              {
                                  sub_name0 : type0;
                                  sub_name1 : type[64];
                                  sub_name2 : type1*;
                              }

       ARGUMENTS: 

        'GroupF'               allows to have 'nested variable groups' in curly brackets

        'already_defined_list' informs about variable names that have been already
                               chosen. It is only used for groups.

       RETURNS:
                 None        on failure to pass a variable definition.
                 array       when a single variable definition was found. 
                                array[0] = UserCodeFragment containing the type. 
                                array[1] = name of the variable.
                 dictionary  if it was a combined variable definition. The dictionary
                               maps: (variable name) ---> (UserCodeFragment with type)
    
    """
    position = fh.tell()

    line_n   = get_current_line_info_number(fh)
    skip_whitespace(fh)
    name_str = read_identifier(fh)
    if name_str == "":
        if not GroupF or not check(fh, "{"): 
            fh.seek(position); 
            return None
        sub_db = parse_variable_definition_list(fh, "Concurrent union variables", already_defined_list)
        if not check(fh, "}"): 
            fh.seek(position)
            error_msg("Missing closing '}' after concurrent variable definition.", fh)
        return [ sub_db ]

    else:
        name_str = name_str.strip()
        if not check(fh, ":"): error_msg("Missing ':' after identifier '%s'." % name_str, fh)
        
        if fh.read(1).isspace() == False:
            error_msg("Missing whitespace after ':' after identifier '%s'.\n" % name_str \
                    + "The notation has to be: variable-name ':' type ';'.", fh)

        type_str, i = read_until_letter(fh, ";", Verbose=True)
        if i == -1: error_msg("missing ';'", fh)
        type_str = type_str.strip()

        return [ UserCodeFragment(type_str, fh.name, line_n), name_str ]