Exemple #1
0
def token_id_db_verify_or_enter_token_id(fh, TokenName):
    global Setup

    prefix_less_TokenName = cut_token_id_prefix(TokenName, fh)

    # Occasionally add token id automatically to database
    if not blackboard.token_id_db.has_key(prefix_less_TokenName):
        # DO NOT ENFORCE THE TOKEN ID TO BE DEFINED, BECAUSE WHEN THE TOKEN ID
        # IS DEFINED IN C-CODE, THE IDENTIFICATION IS NOT 100% SAFE.
        if TokenName in blackboard.token_id_db.keys():
            msg = "Token id '%s' defined implicitly.\n" % TokenName
            msg += "'%s' has been defined in a token { ... } section!\n" % \
                   (Setup.token_id_prefix + TokenName)
            msg += "Token ids in the token { ... } section are automatically prefixed."
            error_msg(msg,
                      fh,
                      DontExitF=True,
                      SuppressCode=NotificationDB.
                      warning_usage_of_undefined_token_id_name)
        else:
            # Warning is posted later when all implicit tokens have been
            # collected. See "token_id_maker.__propose_implicit_token_definitions()"
            blackboard.token_id_implicit_list.append(
                (prefix_less_TokenName, SourceRef.from_FileHandle(fh)))

        # Enter the implicit token id definition in the database
        blackboard.token_id_db[prefix_less_TokenName] = \
                TokenInfo(prefix_less_TokenName, None, None,
                          SourceRef.from_FileHandle(fh))
Exemple #2
0
def parse(fh, new_mode):
    source_reference = SourceRef.from_FileHandle(fh)

    identifier = read_option_start(fh)
    if identifier is None: return False

    verify_word_in_list(identifier, mode_option_info_db.keys(),
                        "mode option", fh.name, get_current_line_info_number(fh))

    if   identifier == "skip":
        value = __parse_skip_option(fh, new_mode, identifier)

    elif identifier in ["skip_range", "skip_nested_range"]:
        value = __parse_range_skipper_option(fh, identifier, new_mode)
        
    elif identifier == "indentation":
        value = counter.parse_indentation(fh)
        value.set_containing_mode_name(new_mode.name)
        blackboard.required_support_indentation_count_set()

    elif identifier == "counter":
        value = counter.parse_line_column_counter(fh)

    elif identifier in ("entry", "exit", "restrict"):
        value = read_option_value(fh, ListF=True) # A 'list' of strings
    else:
        value = read_option_value(fh)             # A single string

    # Finally, set the option
    new_mode.option_db.enter(identifier, value, source_reference, new_mode.name)
    return True
Exemple #3
0
def parse(fh):
    descriptor = TokenTypeDescriptorCore()

    if not check(fh, "{"):
        error_msg("Missing opening '{' at begin of token_type definition", fh)

    already_defined_list = []
    position = fh.tell()
    sr_begin = SourceRef.from_FileHandle(fh)
    result = True
    while result == True:
        try:
            # x = fh.tell(); fh.seek(x)
            result = parse_section(fh, descriptor, already_defined_list)
        except EndOfStreamException:
            fh.seek(position)
            error_eof("token_type", fh)

    if not check(fh, "}"):
        fh.seek(position)
        error_msg("Missing closing '}' at end of token_type definition.", fh)

    result = TokenTypeDescriptor(descriptor, sr_begin)
    if     len(result.get_member_db()) == 0       \
       and result.class_name == "Token"           \
       and result.token_id_type.sr.is_void()      \
       and result.column_number_type.sr.is_void() \
       and result.line_number_type.sr.is_void():
        error_msg("Section 'token_type' does not define any members, does not\n" + \
                  "modify any standard member types, nor does it define a class\n" + \
                  "different from 'Token'.", fh)

    result.consistency_check()
    return result
Exemple #4
0
def __parse_brief_token_sender(fh, ContinueF):
    # shorthand for { self.send(TKN_SOMETHING); QUEX_SETTING_AFTER_SEND_CONTINUE_OR_RETURN(); }

    position = fh.tell()
    try:
        skip_whitespace(fh)
        position = fh.tell()

        code = __parse_token_id_specification_by_character_code(fh)
        if code != -1:
            code = __create_token_sender_by_character_code(fh, code)
        else:
            skip_whitespace(fh)
            identifier = __read_token_identifier(fh)
            skip_whitespace(fh)
            if identifier in ["GOTO", "GOSUB", "GOUP"]:
                code = __create_mode_transition_and_token_sender(
                    fh, identifier)
            else:
                code = __create_token_sender_by_token_name(fh, identifier)
                check_or_die(fh, ";")

        if len(code) != 0:
            if ContinueF:
                code += "QUEX_SETTING_AFTER_SEND_CONTINUE_OR_RETURN();\n"
            return CodeUser(code, SourceRef.from_FileHandle(fh))
        else:
            return None

    except EndOfStreamException:
        fh.seek(position)
        error_eof("token", fh)
Exemple #5
0
def parse(fh):
    """This function parses a mode description and enters it into the 
       'blackboard.mode_description_db'. Once all modes are parsed
       they can be translated into 'real' modes and are located in
       'blackboard.mode_db'. 
    """

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    skip_whitespace(fh)
    mode_name = read_identifier(fh, OnMissingStr="Missing identifier at beginning of mode definition.")

    # NOTE: constructor does register this mode in the mode_db
    new_mode  = ModeDescription(mode_name, SourceRef.from_FileHandle(fh))

    # (*) inherited modes / option_db
    skip_whitespace(fh)
    dummy = fh.read(1)
    if dummy not in [":", "{"]:
        error_msg("missing ':' or '{' after mode '%s'" % mode_name, fh)

    if dummy == ":":
        __parse_option_list(new_mode, fh)

    # (*) read in pattern-action pairs and events
    while __parse_element(new_mode, fh): 
        pass
Exemple #6
0
def __parse_element(new_mode, fh):
    """Returns: False, if a closing '}' has been found.
                True, else.
    """
    position = fh.tell()
    try:
        description = "pattern or event handler" 

        skip_whitespace(fh)
        # NOTE: Do not use 'read_word' since we need to continue directly after
        #       whitespace, if a regular expression is to be parsed.
        position = fh.tell()

        word = read_until_whitespace(fh)
        if word == "}": return False

        # -- check for 'on_entry', 'on_exit', ...
        if __parse_event(new_mode, fh, word): return True

        fh.seek(position)
        description = "start of mode element: regular expression"
        pattern     = regular_expression.parse(fh)
        pattern.set_source_reference(SourceRef.from_FileHandle(fh, new_mode.name))

        position    = fh.tell()
        description = "start of mode element: code fragment for '%s'" % pattern.pattern_string()

        __parse_action(new_mode, fh, pattern.pattern_string(), pattern)

    except EndOfStreamException:
        fh.seek(position)
        error_eof(description, fh)

    return True
Exemple #7
0
def parse(fh):
    """This function parses a mode description and enters it into the 
       'blackboard.mode_description_db'. Once all modes are parsed
       they can be translated into 'real' modes and are located in
       'blackboard.mode_db'. 
    """

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    skip_whitespace(fh)
    mode_name = read_identifier(
        fh, OnMissingStr="Missing identifier at beginning of mode definition.")

    # NOTE: constructor does register this mode in the mode_db
    new_mode = ModeDescription(mode_name, SourceRef.from_FileHandle(fh))

    # (*) inherited modes / option_db
    skip_whitespace(fh)
    dummy = fh.read(1)
    if dummy not in [":", "{"]:
        error_msg("missing ':' or '{' after mode '%s'" % mode_name, fh)

    if dummy == ":":
        __parse_option_list(new_mode, fh)

    # (*) read in pattern-action pairs and events
    while __parse_element(new_mode, fh):
        pass
Exemple #8
0
def parse(fh, new_mode):
    source_reference = SourceRef.from_FileHandle(fh)

    identifier = read_option_start(fh)
    if identifier is None: return False

    verify_word_in_list(identifier, mode_option_info_db.keys(), "mode option",
                        fh.name, get_current_line_info_number(fh))

    if identifier == "skip":
        value = __parse_skip_option(fh, new_mode, identifier)

    elif identifier in ["skip_range", "skip_nested_range"]:
        value = __parse_range_skipper_option(fh, identifier, new_mode)

    elif identifier == "indentation":
        value = counter.parse_indentation(fh)
        value.set_containing_mode_name(new_mode.name)
        blackboard.required_support_indentation_count_set()

    elif identifier == "counter":
        value = counter.parse_line_column_counter(fh)

    elif identifier in ("entry", "exit", "restrict"):
        value = read_option_value(fh, ListF=True)  # A 'list' of strings
    else:
        value = read_option_value(fh)  # A single string

    # Finally, set the option
    new_mode.option_db.enter(identifier, value, source_reference,
                             new_mode.name)
    return True
Exemple #9
0
def parse(fh):
    descriptor = TokenTypeDescriptorCore()

    if not check(fh, "{"):
        error_msg("Missing opening '{' at begin of token_type definition", fh)

    already_defined_list = []
    position             = fh.tell()
    sr_begin             = SourceRef.from_FileHandle(fh)
    result               = True
    while result == True:
        try: 
            # x = fh.tell(); fh.seek(x)
            result = parse_section(fh, descriptor, already_defined_list)
        except EndOfStreamException:
            fh.seek(position)
            error_eof("token_type", fh)

        
    if not check(fh, "}"):
        fh.seek(position)
        error_msg("Missing closing '}' at end of token_type definition.", fh);

    result = TokenTypeDescriptor(descriptor, sr_begin)
    if     len(result.get_member_db()) == 0       \
       and result.class_name == "Token"           \
       and result.token_id_type.sr.is_void()      \
       and result.column_number_type.sr.is_void() \
       and result.line_number_type.sr.is_void():
        error_msg("Section 'token_type' does not define any members, does not\n" + \
                  "modify any standard member types, nor does it define a class\n" + \
                  "different from 'Token'.", fh)

    result.consistency_check()
    return result
Exemple #10
0
 def add_match_deletion(self, ThePattern, fh):
     """If one of the base modes contains a pattern which is identical to this
        pattern, it has to be deleted.
     """
     PatternIdx = ThePattern.incidence_id()
     self.deletion_info_list.append(
         PatternDeletion(ThePattern, PatternIdx,
                         SourceRef.from_FileHandle(fh, self.name)))
Exemple #11
0
def __create_token_sender_by_character_code(fh, CharacterCode):
    # The '--' will prevent the token name from being printed
    prefix_less_token_name = "UCS_0x%06X" % CharacterCode
    token_id_str = "0x%06X" % CharacterCode
    blackboard.token_id_db["--" + prefix_less_token_name] = \
            TokenInfo(prefix_less_token_name, CharacterCode, None,
                      SourceRef.from_FileHandle(fh))
    return "self_send(%s);\n" % token_id_str
Exemple #12
0
    def __init__(self, Name="", StateMachine="", SourceReference=None, RE=""):
        assert StateMachine.__class__.__name__ == "StateMachine"

        self.name               = Name
        self.__state_machine    = StateMachine
        if SourceReference is None: SourceReference = SourceRef()
        self.sr                 = SourceReference
        self.regular_expression = RE
Exemple #13
0
 def add_match_priority(self, ThePattern, fh):
     """Whenever a pattern in the mode occurs, which is identical to that given
        by 'ThePattern', then the priority is adapted to the pattern index given
        by the current pattern index.
     """
     PatternIdx = ThePattern.incidence_id()
     self.reprioritization_info_list.append(
         PatternRepriorization(ThePattern, PatternIdx,
                               SourceRef.from_FileHandle(fh, self.name)))
Exemple #14
0
 def add_match_deletion(self, ThePattern, fh):
     """If one of the base modes contains a pattern which is identical to this
        pattern, it has to be deleted.
     """
     PatternIdx = ThePattern.incidence_id() 
     self.deletion_info_list.append(
         PatternDeletion(ThePattern, PatternIdx, 
                         SourceRef.from_FileHandle(fh, self.name))
     )
Exemple #15
0
    def add_pattern_action_pair(self, ThePattern, TheAction, fh):
        assert ThePattern.check_consistency()

        if ThePattern.pre_context_trivial_begin_of_line_f:
            blackboard.required_support_begin_of_line_set()

        TheAction.set_source_reference(SourceRef.from_FileHandle(fh, self.name))

        self.pattern_action_pair_list.append(PatternActionInfo(ThePattern, TheAction))
Exemple #16
0
def do(UTF8_String_or_Stream, PatternDict, 
       AllowNothingIsNecessaryF = False, SpecialTerminator=None):
    global SPECIAL_TERMINATOR 
    assert type(AllowNothingIsNecessaryF) == bool
    assert type(PatternDict) == dict

    # SPECIAL_TERMINATOR --> if string is not only to be terminated by ' '
    SPECIAL_TERMINATOR = SpecialTerminator

    def __ensure_whitespace_follows(InitialPos, stream):
        tmp = stream.read(1)
        if tmp == "" or tmp.isspace() or tmp == SPECIAL_TERMINATOR:
            stream.seek(-1, 1)
            return

        end_position = stream.tell() - 1
        stream.seek(InitialPos)
        pattern_str = stream.read(end_position - InitialPos)
        error_msg("Pattern definition '%s' not followed by whitespace.\n" % pattern_str + \
                  "Found subsequent character '%s'." % tmp, 
                  stream)

    if type(UTF8_String_or_Stream) == str: stream = StringIO(UTF8_String_or_Stream)
    else:                                  stream = UTF8_String_or_Stream    

    if PatternDict is None: PatternDict = {}

    initial_position = stream.tell()

    # -- check for the begin of line condition (BOL)
    if check(stream, '^'): begin_of_line_f = True
    else:                  begin_of_line_f = False
    
    # -- MAIN: transform the pattern into a state machine
    pre, core, post = snap_conditional_expression(stream, PatternDict)

    if core is None: 
        stream.seek(initial_position)
        return None

    # -- check for end of line condition (EOL) 
    # -- check for terminating whitespace
    end_of_line_f = False
    if check(stream, '$'): end_of_line_f = True

    __ensure_whitespace_follows(initial_position, stream)
    
    pattern = Pattern(CoreSM        = core, 
                      BeginOfLineF  = begin_of_line_f,
                      PreContextSM  = pre,
                      EndOfLineF    = end_of_line_f,
                      PostContextSM = post,
                      Sr            = SourceRef.from_FileHandle(stream),
                      PatternString = read_pattern_string(stream, initial_position),
                      AllowNothingIsNecessaryF = AllowNothingIsNecessaryF)
    
    return pattern
Exemple #17
0
 def add_match_priority(self, ThePattern, fh):
     """Whenever a pattern in the mode occurs, which is identical to that given
        by 'ThePattern', then the priority is adapted to the pattern index given
        by the current pattern index.
     """
     PatternIdx = ThePattern.incidence_id() 
     self.reprioritization_info_list.append(
         PatternRepriorization(ThePattern, PatternIdx, 
                               SourceRef.from_FileHandle(fh, self.name))
     )
Exemple #18
0
    def default_setting(self, ModeName):
        if self.__default_value is None:
            return None

        if isinstance(self.__default_value, types.FunctionType):
            content = self.__default_value()
        else:
            content = self.__default_value

        return OptionSetting(content, SourceRef(), ModeName)
Exemple #19
0
    def __init__(self, Core=None):
        if Core is None:
            self._file_name = Setup.output_token_class_file
            self._file_name_implementation = Setup.output_token_class_file_implementation
            if Setup.token_class_name.find("::") != -1:
                Setup.token_class_name,       \
                Setup.token_class_name_space, \
                Setup.token_class_name_safe = \
                        read_namespaced_name(Setup.token_class_name,
                                             "token class (options --token-class, --tc)")
            self.class_name = Setup.token_class_name
            self.class_name_safe = Setup.token_class_name_safe
            self.name_space = Setup.token_class_name_space
            self.open_for_derivation_f = False
            self.token_contains_token_id_f = True
            self.token_id_type = CodeUser("size_t", SourceRef())
            self.column_number_type = CodeUser("size_t", SourceRef())
            self.line_number_type = CodeUser("size_t", SourceRef())

            self.distinct_db = {}
            self.union_db = {}

            for name, default_value in token_type_code_fragment_db.iteritems():
                self.__dict__[name] = default_value

        else:
            self._file_name = Core._file_name
            self._file_name_implementation = Core._file_name_implementation
            self.class_name = Core.class_name
            self.class_name_safe = Core.class_name_safe
            self.name_space = Core.name_space
            self.open_for_derivation_f = Core.open_for_derivation_f
            self.token_contains_token_id_f = Core.token_contains_token_id_f
            self.token_id_type = Core.token_id_type
            self.column_number_type = Core.column_number_type
            self.line_number_type = Core.line_number_type

            self.distinct_db = Core.distinct_db
            self.union_db = Core.union_db

            for name in token_type_code_fragment_db.keys():
                self.__dict__[name] = Core.__dict__[name]
Exemple #20
0
    def add_pattern_action_pair(self, ThePattern, TheAction, fh):
        assert ThePattern.check_consistency()

        if ThePattern.pre_context_trivial_begin_of_line_f:
            blackboard.required_support_begin_of_line_set()

        TheAction.set_source_reference(SourceRef.from_FileHandle(
            fh, self.name))

        self.pattern_action_pair_list.append(
            PatternActionInfo(ThePattern, TheAction))
Exemple #21
0
def __extra_option_extract_from_file(FileName):
    """Extract an option section from a given file. The quex command line 
       options may be given in a section surrounded by '<<<QUEX-OPTIONS>>>'
       markers. For example:

           <<<QUEX-OPTIONS>>>
              --token-class-file      Common-token
              --token-class           Common::Token
              --token-id-type         uint32_t
              --buffer-element-type   uint8_t
              --lexeme-null-object    ::Common::LexemeNullObject
              --foreign-token-id-file Common-token_ids
           <<<QUEX-OPTIONS>>>

       This function extracts those options and builds a new 'argv' array, i.e.
       an array of strings are if they would come from the command line.
    """
    MARKER = "<<<QUEX-OPTIONS>>>"
    fh     = open_file_or_die(FileName)

    while 1 + 1 == 2:
        line = fh.readline()
        if line == "":
            return None, [] # Simply no starting marker has been found
        elif line.find(MARKER) != -1: 
            pos = fh.tell()
            break

    result = []
    location_list = []

    line_n = 0
    while 1 + 1 == 2:
        line_n += 1
        line    = fh.readline()
        if line == "":
            fh.seek(pos)
            error_msg("Missing terminating '%s'." % MARKER, fh)

        if line.find(MARKER) != -1: 
            break
        
        idx = line.find("-")
        if idx == -1: continue
        options = line[idx:].split()

        location_list.append((SourceRef(FileName, line_n), options))
        result.extend(options)

    if len(result) == 0: return None, location_list

    return result, location_list
Exemple #22
0
def parse_pattern_name_definitions(fh):
    """Parses pattern definitions of the form:
   
          WHITESPACE  [ \t\n]
          IDENTIFIER  [a-zA-Z0-9]+
          OP_PLUS     "+"
          
       That means: 'name' whitespace 'regular expression' whitespace newline.
       Comments can only be '//' nothing else and they have to appear at the
       beginning of the line.
       
       One regular expression can have more than one name, but one name can 
       only have one regular expression.
    """
    skip_whitespace(fh)
    if not check(fh, "{"):
        error_msg("define region must start with opening '{'.", fh)

    while 1 + 1 == 2:
        skip_whitespace(fh)

        if check(fh, "}"): 
            return
        
        # -- get the name of the pattern
        skip_whitespace(fh)
        pattern_name = read_identifier(fh, OnMissingStr="Missing identifier for pattern definition.")

        if blackboard.shorthand_db.has_key(pattern_name):
            error_msg("Second definition of pattern '%s'.\n" % pattern_name + \
                      "Pattern names must be unique.", fh)

        skip_whitespace(fh)

        if check(fh, "}"): 
            error_msg("Missing regular expression for pattern definition '%s'." % \
                      pattern_name, fh)

        # A regular expression state machine
        # (No possible transformation into a particular codec whatever.
        #  the state machines are transformed once, after they are expanded
        #  as patterns in a mode.)
        pattern = regular_expression.parse(fh, AllowNothingIsFineF = True) 

        if pattern.has_pre_or_post_context():
            error_msg("Pattern definition with pre- and/or post-context.\n" + \
                      "Pre- and Post-Contexts can only be defined inside mode definitions.", fh)
        state_machine = pattern.sm

        blackboard.shorthand_db[pattern_name] = \
                PatternShorthand(pattern_name, state_machine, 
                                 SourceRef.from_FileHandle(fh), pattern.pattern_string())
Exemple #23
0
def create_ALL_BUT_NEWLINE_state_machine(stream):
    global Setup
    result = StateMachine()
    # NOTE: Buffer control characters are supposed to be filtered out by the code
    #       generator.
    trigger_set = NumberSet(Interval(ord("\n"))).get_complement(Setup.buffer_codec.source_set)
    if trigger_set.is_empty():
        error_msg("The set of admissible characters contains only newline.\n"
                  "The '.' for 'all but newline' is an empty set.",
                  SourceRef.from_FileHandle(stream))

    result.add_transition(result.init_state_index, trigger_set, AcceptanceF=True) 
    return result
Exemple #24
0
def __parse_definition_head(fh, result):

    if check(fh, "\\default"): 
        error_msg("'\\default' has been replaced by keyword '\\else' since quex 0.64.9!", fh)
    elif check(fh, "\\else"): 
        pattern = None
    else:                      
        pattern = regular_expression.parse(fh)

    skip_whitespace(fh)
    check_or_die(fh, "=>", " after character set definition.")

    skip_whitespace(fh)
    identifier = read_identifier(fh, OnMissingStr="Missing identifier for indentation element definition.")
    verify_word_in_list(identifier, result.identifier_list, 
                        "Unrecognized specifier '%s'." % identifier, fh)
    skip_whitespace(fh)

    return pattern, identifier, SourceRef.from_FileHandle(fh)
Exemple #25
0
    def straighten_open_line_pragmas(self, FileName):
        norm_filename = Setup.get_file_reference(FileName)
        line_pragma_txt = self._SOURCE_REFERENCE_END().strip()

        new_content = []
        line_n = 1  # NOT: 0!
        fh = open_file_or_die(FileName)
        while 1 + 1 == 2:
            line = fh.readline()
            line_n += 1
            if not line:
                break
            elif line.strip() != line_pragma_txt:
                new_content.append(line)
            else:
                line_n += 1
                new_content.append(
                    self._SOURCE_REFERENCE_BEGIN(
                        SourceRef(norm_filename, line_n)))
        fh.close()
        write_safely_and_close(FileName, "".join(new_content))
Exemple #26
0
def __parse_definition_head(fh, result):

    if check(fh, "\\default"):
        error_msg(
            "'\\default' has been replaced by keyword '\\else' since quex 0.64.9!",
            fh)
    elif check(fh, "\\else"):
        pattern = None
    else:
        pattern = regular_expression.parse(fh)

    skip_whitespace(fh)
    check_or_die(fh, "=>", " after character set definition.")

    skip_whitespace(fh)
    identifier = read_identifier(
        fh,
        OnMissingStr="Missing identifier for indentation element definition.")
    verify_word_in_list(identifier, result.identifier_list,
                        "Unrecognized specifier '%s'." % identifier, fh)
    skip_whitespace(fh)

    return pattern, identifier, SourceRef.from_FileHandle(fh)
Exemple #27
0
def __parse_element(new_mode, fh):
    """Returns: False, if a closing '}' has been found.
                True, else.
    """
    position = fh.tell()
    try:
        description = "pattern or event handler"

        skip_whitespace(fh)
        # NOTE: Do not use 'read_word' since we need to continue directly after
        #       whitespace, if a regular expression is to be parsed.
        position = fh.tell()

        word = read_until_whitespace(fh)
        if word == "}": return False

        # -- check for 'on_entry', 'on_exit', ...
        if __parse_event(new_mode, fh, word): return True

        fh.seek(position)
        description = "start of mode element: regular expression"
        pattern = regular_expression.parse(fh)
        pattern.set_source_reference(
            SourceRef.from_FileHandle(fh, new_mode.name))

        position = fh.tell()
        description = "start of mode element: code fragment for '%s'" % pattern.pattern_string(
        )

        __parse_action(new_mode, fh, pattern.pattern_string(), pattern)

    except EndOfStreamException:
        fh.seek(position)
        error_eof(description, fh)

    return True
Exemple #28
0
def parse_line_column_counter(fh):
    result = __parse(fh, ParserDataLineColumn(SourceRef.from_FileHandle(fh)))
    result.finalize()
    return result
Exemple #29
0
def parse_section(fh):
    global default_token_type_definition_triggered_by_mode_definition_f

    # NOTE: End of File is supposed to be reached when trying to read a new
    #       section. Thus, the end-of-file catcher does not encompass the beginning.
    position = fh.tell()
    skip_whitespace(fh)
    word = read_identifier(fh, OnMissingStr="Missing section title")

    verify_word_in_list(word, blackboard.all_section_title_list, 
                        "Unknown quex section '%s'" % word, fh)
    try:
        # (*) determine what is defined
        #
        #     -- 'mode { ... }'     => define a mode
        #     -- 'start = ...;'     => define the name of the initial mode
        #     -- 'header { ... }'   => define code that is to be pasted on top
        #                              of the engine (e.g. "#include<...>")
        #     -- 'body { ... }'     => define code that is to be pasted in the class' body
        #                              of the engine (e.g. "public: int  my_member;")
        #     -- 'init { ... }'     => define code that is to be pasted in the class' constructors
        #                              of the engine (e.g. "my_member = -1;")
        #     -- 'define { ... }'   => define patterns shorthands such as IDENTIFIER for [a-z]+
        #     -- 'repeated_token_id = QUEX_TKN_ ...;' => enables token repetition, defines
        #                                                the token id to be repeated.
        #     -- 'token { ... }'    => define token ids
        #     -- 'token_type { ... }'  => define a customized token type
        #
        if word in blackboard.fragment_db.keys():
            element_name = blackboard.fragment_db[word]
            fragment     = code_fragment.parse(fh, word, AllowBriefTokenSenderF=False)        
            blackboard.__dict__[element_name] = fragment
            return

        elif word == "start":
            mode_name = parse_identifier_assignment(fh)
            if mode_name == "":
                error_msg("Missing mode_name after 'start ='", fh)

            elif not blackboard.initial_mode.sr.is_void():
                error_msg("start mode defined more than once!", fh, DontExitF=True)
                error_msg("previously defined here", blackboard.initial_mode.sr)
             
            blackboard.initial_mode = CodeUser(mode_name, SourceRef.from_FileHandle(fh))
            return

        elif word == "repeated_token":
            blackboard.token_repetition_token_id_list = parse_token_id_definitions(fh, NamesOnlyF=True)
            for token_name in blackboard.token_repetition_token_id_list:
                verify_word_in_list(token_name[len(Setup.token_id_prefix):],
                                    blackboard.token_id_db.keys(),
                                    "Token ID '%s' not yet defined." % token_name,
                                    fh, ExitF=False, 
                                    SuppressCode=NotificationDB.warning_repeated_token_not_yet_defined)
            return
            
        elif word == "define":
            parse_pattern_name_definitions(fh)
            return

        elif word == "token":       
            if Setup.token_id_foreign_definition:
                error_msg("Token id file '%s' has been specified.\n" \
                          % Setup.token_id_foreign_definition_file \
                          + "All token ids must be specified there. Section 'token'\n" \
                          + "is not allowed.", fh)

            parse_token_id_definitions(fh)
            return

        elif word == "token_type":       

            if Setup.token_class_file != "":
                error_msg("Section 'token_type' is intended to generate a token class.\n" \
                          + "However, the manually written token class file '%s'" \
                          % repr(Setup.token_class_file) \
                          + "has been specified on the command line.", fh)
       
            if blackboard.token_type_definition is None:
                blackboard.token_type_definition = token_type.parse(fh)
                return

            # Error case:
            if default_token_type_definition_triggered_by_mode_definition_f:
                error_msg("Section 'token_type' must appear before first mode definition.", fh)
            else:
                error_msg("Section 'token_type' has been defined twice.", fh, DontExitF=True)
                error_msg("Previously defined here.",
                          blackboard.token_type_definition.sr.file_name,
                          blackboard.token_type_definition.sr.line_n)
            return

        elif word == "mode":
            # When the first mode is parsed then a token_type definition must be 
            # present. If not, the default token type definition is considered.
            if blackboard.token_type_definition is None:
                parse_default_token_definition()
                default_token_type_definition_triggered_by_mode_definition_f = True

            mode.parse(fh)
            return

        else:
            # This case should have been caught by the 'verify_word_in_list' function
            assert False

    except EndOfStreamException:
        fh.seek(position)
        error_eof(word, fh)
Exemple #30
0
def parse_token_id_definitions(fh, NamesOnlyF=False):
    """NamesOnlyF == True: Allow only definition of names, no numeric values 
                           may be assigned to it.

       'NamesOnlyF' indicates that data is not written to the global 
       'token_id_db'. Then only a list of names is returned.
    """
    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    prefix       = Setup.token_id_prefix
    prefix_plain = Setup.token_id_prefix_plain # i.e. without name space included

    if NamesOnlyF: 
        result = set()

    skip_whitespace(fh)
    if not check(fh, "{"):
        error_msg("Missing opening '{' for after 'token' section identifier.", fh)

    while check(fh, "}") == False:
        skip_whitespace(fh)

        candidate = read_identifier(fh, TolerantF=True, OnMissingStr="Missing valid token identifier.")

        # -- check the name, if it starts with the token prefix paste a warning
        suspicious_prefix = None
        if len(prefix) != 0 and candidate.find(prefix) == 0:       
            suspicious_prefix = prefix
        elif len(prefix_plain) != 0 and candidate.find(prefix_plain) == 0: 
            suspicious_prefix = prefix_plain

        if suspicious_prefix is not None:
            error_msg("Token identifier '%s' starts with token prefix '%s'.\n" \
                      % (candidate, suspicious_prefix) \
                      + "Token prefix is mounted automatically. This token id appears in the source\n" \
                      + "code as '%s%s'." \
                      % (prefix, candidate), \
                      fh, DontExitF=True,
                      SuppressCode=NotificationDB.warning_token_id_prefix_appears_in_token_id_name)

        skip_whitespace(fh)

        if NamesOnlyF:
            result.add(prefix + candidate)
            if check(fh, ";") == False:
                error_msg("Missing ';' after token identifier '%s'.\n" \
                          % candidate, fh)
            continue

        # Parse a possible numeric value after '='
        numeric_value = None
        if check(fh, "="):
            skip_whitespace(fh)
            numeric_value = read_integer(fh)
            if numeric_value is None:
                error_msg("Missing number after '=' for token identifier '%s'." % candidate, fh)

        if check(fh, ";") == False:
            error_msg("Missing ';' after token identifier '%s'." % candidate, fh)

        if not NamesOnlyF:
            ti = TokenInfo(candidate, numeric_value, 
                           SourceReference=SourceRef.from_FileHandle(fh))
            blackboard.token_id_db[candidate] = ti

    if NamesOnlyF:
        return sorted(list(result))
    else:
        return # Changes are applied to 'blackboard.token_id_db'
Exemple #31
0
def parse(ForeignTokenIdFile, CommentDelimiterList):
    """This function somehow interprets the user defined token id file--if there is
       one. It does this in order to find the names of defined token ids. It does
       some basic interpretation and include file following, but: **it is in no
       way perfect**. Since its only purpose is to avoid warnings about token ids
       that are not defined it is not essential that it may fail sometimes.

       It is more like a nice feature that quex tries to find definitions on its own.
       
       Nevertheless, it should work in the large majority of cases.
    """
    # Regular expression to find '#include <something>' and extract the 'something'
    # in a 'group'. Note that '(' ')' cause the storage of parts of the match.
    IncludeRE = "#[ \t]*include[ \t]*[\"<]([^\">]+)[\">]"

    include_re_obj = re.compile(IncludeRE)

    def get_line_n_of_include(FileName, IncludedFileName):
        fh = open_file_or_die(FileName, Mode="rb")
        line_n = 0
        for line in fh.readlines():
            line_n += 1
            if include_re_obj.search(
                    line) is not None and line.find(IncludedFileName) != -1:
                break
        else:
            # Included file must appear in including file, but tolerate for safety.
            pass

        fh.close()
        return line_n

    # validate(...) ensured, that the file exists.
    work_list = [ForeignTokenIdFile]
    done_list = []
    not_found_list = []
    recursive_list = []
    found_db = {}
    while len(work_list) != 0:
        file_name = work_list.pop()
        content = __delete_comments(
            get_file_content_or_die(file_name, Mode="rb"),
            CommentDelimiterList)
        done_list.append(os.path.normpath(file_name))

        # (*) Search for TokenID definitions
        begin_i = 0
        end_i = len(content)
        if Setup.token_id_foreign_definition_file_region_begin_re is not None:
            match = Setup.token_id_foreign_definition_file_region_begin_re.search(
                content)
            if match is not None:
                begin_i = match.end()

        if Setup.token_id_foreign_definition_file_region_end_re is not None:
            match = Setup.token_id_foreign_definition_file_region_end_re.search(
                content, pos=begin_i)
            if match is not None:
                end_i = match.start()
        content = content[begin_i:end_i]

        token_id_list = __extract_token_ids(content, file_name)
        if len(token_id_list) != 0:
            found_db[file_name] = copy(token_id_list)

        token_id_foreign_set.update(token_id_list)
        for token_name in token_id_list:
            # NOTE: The line number might be wrong, because of the comment deletion
            line_n = 0
            # NOTE: The actual token value is not important, since the token's numeric
            #       identifier is defined in the user's header. We do not care.
            prefix_less_token_name = cut_token_id_prefix(token_name)
            token_id_db[prefix_less_token_name] = \
                        TokenInfo(prefix_less_token_name, None, None, SourceRef(file_name, line_n))

        # (*) find "#include" statements
        #     'set' ensures that each entry is unique
        include_file_set = set(include_re_obj.findall(content))

        #     -- ensure that included files exist and are not included twice
        for included_file in include_file_set:
            normed_included_file = os.path.normpath(included_file)
            if included_file in done_list:
                line_n = get_line_n_of_include(file_name, included_file)
                recursive_list.append((file_name, line_n, included_file))
            elif not os.access(normed_included_file, os.F_OK):
                line_n = get_line_n_of_include(file_name, included_file)
                not_found_list.append((file_name, line_n, included_file))
            elif normed_included_file not in done_list:
                work_list.append(included_file)

    if Setup.token_id_foreign_definition_file_show_f:
        if len(found_db) == 0:
            error_msg("No token ids with prefix '%s' found in" %
                      Setup.token_id_prefix + "'%s' or included files." %
                      Setup.token_id_foreign_definition_file,
                      NoteF=True)
        else:
            txt = []
            for file_name, result in found_db.iteritems():
                result = set(result)
                L = max(map(len, result))
                txt.append("Token ids found in file '%s' {\n" % file_name)
                for name in sorted(result):
                    shorty = cut_token_id_prefix(name)
                    fully = Setup.token_id_prefix + shorty
                    txt.append("     %s %s=> '%s'\n" %
                               (fully, space(L, name), shorty))
                txt.append("}")
                txt.append("\n")

            if txt: txt = txt[:-1]
            error_msg("".join(txt), NoteF=True)

    ErrorN = NotificationDB.token_id_ignored_files_report
    if ErrorN not in Setup.suppressed_notification_list:
        if len(not_found_list) != 0:
            not_found_list.sort()
            error_msg("Files not found:",
                      not_found_list[0][0],
                      LineN=not_found_list[0][1],
                      DontExitF=True)
            for file_name, line_n, included_file in not_found_list:
                error_msg("%s" % included_file,
                          file_name,
                          LineN=line_n,
                          DontExitF=True)

        if len(recursive_list) != 0:
            recursive_list.sort()
            error_msg("Files recursively included (ignored second inclusion):",
                      recursive_list[0][0],
                      LineN=recursive_list[0][1],
                      DontExitF=True)
            for file_name, line_n, included_file in recursive_list:
                error_msg("%s" % included_file,
                          file_name,
                          LineN=line_n,
                          DontExitF=True)

        if len(not_found_list) != 0 or len(recursive_list) != 0:
            # file_name and line_n will be taken from last iteration of last for loop.
            error_msg(
                "\nNote, that quex does not handle C-Preprocessor instructions.",
                file_name,
                LineN=line_n,
                DontExitF=True,
                SuppressCode=ErrorN)
Exemple #32
0
def parse_variable_definition(fh, GroupF=False, already_defined_list=[]):
    """PURPOSE: Parsing of a variable definition consisting of 'type' and 'name.
                Members can be mentioned together in a group, which means that
                they can appear simultaneously. Possible expresions are

                (1) single variables:

                              name0 : type;
                              name1 : type[32];
                              name2 : type*;

                (2) combined variables

                              {
                                  sub_name0 : type0;
                                  sub_name1 : type[64];
                                  sub_name2 : type1*;
                              }

       ARGUMENTS: 

        'GroupF'               allows to have 'nested variable groups' in curly brackets

        'already_defined_list' informs about variable names that have been already
                               chosen. It is only used for groups.

       RETURNS:
                 None        on failure to pass a variable definition.
                 array       when a single variable definition was found. 
                                array[0] = UserCodeFragment containing the type. 
                                array[1] = name of the variable.
                 dictionary  if it was a combined variable definition. The dictionary
                               maps: (variable name) ---> (UserCodeFragment with type)
    
    """
    position = fh.tell()

    skip_whitespace(fh)
    name_str = read_identifier(fh)
    if name_str == "":
        if not GroupF or not check(fh, "{"): 
            fh.seek(position); 
            return None
        sub_db = parse_variable_definition_list(fh, "Concurrent union variables", already_defined_list)
        if not check(fh, "}"): 
            fh.seek(position)
            error_msg("Missing closing '}' after concurrent variable definition.", fh)
        return [ sub_db ]

    else:
        name_str = name_str.strip()
        if not check(fh, ":"): error_msg("Missing ':' after identifier '%s'." % name_str, fh)
        
        if fh.read(1).isspace() == False:
            error_msg("Missing whitespace after ':' after identifier '%s'.\n" % name_str \
                    + "The notation has to be: variable-name ':' type ';'.", fh)

        type_str, i = read_until_letter(fh, ";", Verbose=True)
        if i == -1: error_msg("missing ';'", fh)
        type_str = type_str.strip()

        return [ CodeUser(type_str, SourceRef.from_FileHandle(fh)), name_str ]
Exemple #33
0
    """User code as it is taken from some input file. It contains:

          .get_code() -- list of strings or text formatting instructions
                         (including possibly annotations about its origin)
          .sr         -- the source reference where it was taken from
          .mode_name  -- Mode where the code was defined
    """
    def __init__(self, Code, SourceReference):
        CodeFragment.__init__(self, Code, SourceReference)

    def clone(self):
        result = CodeUser(deepcopy(self.get_code()), self.sr)
        return result


CodeUser_NULL = CodeUser([], SourceRef())


class CodeTerminal(CodeFragment):
    __slots__ = ("__requires_lexeme_terminating_zero_f",
                 "__requires_lexeme_begin_f", "__pure_code")

    @typed(Code=list,
           SourceReference=SourceRef,
           LexemeRelevanceF=bool,
           LexemeTerminatingZeroF=bool,
           LexemeBeginF=bool,
           PureCode=list)
    def __init__(self,
                 Code,
                 SourceReference=SourceRef_VOID,
Exemple #34
0
def __parse_normal(fh, code_fragment_name):
    code = read_until_closing_bracket(fh, "{", "}")
    return CodeUser(code, SourceRef.from_FileHandle(fh))
Exemple #35
0
def parse_indentation(fh):
    result = __parse(fh,
                     ParserDataIndentation(SourceRef.from_FileHandle(fh)),
                     IndentationSetupF=True)
    result.finalize()
    return result
Exemple #36
0
def parse_line_column_counter(fh):
    result = __parse(fh, ParserDataLineColumn(SourceRef.from_FileHandle(fh)))
    result.finalize()
    return result
Exemple #37
0
def parse_variable_definition(fh, GroupF=False, already_defined_list=[]):
    """PURPOSE: Parsing of a variable definition consisting of 'type' and 'name.
                Members can be mentioned together in a group, which means that
                they can appear simultaneously. Possible expresions are

                (1) single variables:

                              name0 : type;
                              name1 : type[32];
                              name2 : type*;

                (2) combined variables

                              {
                                  sub_name0 : type0;
                                  sub_name1 : type[64];
                                  sub_name2 : type1*;
                              }

       ARGUMENTS: 

        'GroupF'               allows to have 'nested variable groups' in curly brackets

        'already_defined_list' informs about variable names that have been already
                               chosen. It is only used for groups.

       RETURNS:
                 None        on failure to pass a variable definition.
                 array       when a single variable definition was found. 
                                array[0] = UserCodeFragment containing the type. 
                                array[1] = name of the variable.
                 dictionary  if it was a combined variable definition. The dictionary
                               maps: (variable name) ---> (UserCodeFragment with type)
    
    """
    position = fh.tell()

    skip_whitespace(fh)
    name_str = read_identifier(fh)
    if name_str == "":
        if not GroupF or not check(fh, "{"):
            fh.seek(position)
            return None
        sub_db = parse_variable_definition_list(fh,
                                                "Concurrent union variables",
                                                already_defined_list)
        if not check(fh, "}"):
            fh.seek(position)
            error_msg(
                "Missing closing '}' after concurrent variable definition.",
                fh)
        return [sub_db]

    else:
        name_str = name_str.strip()
        if not check(fh, ":"):
            error_msg("Missing ':' after identifier '%s'." % name_str, fh)

        if fh.read(1).isspace() == False:
            error_msg("Missing whitespace after ':' after identifier '%s'.\n" % name_str \
                    + "The notation has to be: variable-name ':' type ';'.", fh)

        type_str, i = read_until_letter(fh, ";", Verbose=True)
        if i == -1: error_msg("missing ';'", fh)
        type_str = type_str.strip()

        return [CodeUser(type_str, SourceRef.from_FileHandle(fh)), name_str]
Exemple #38
0
def parse_indentation(fh):
    result = __parse(fh, ParserDataIndentation(SourceRef.from_FileHandle(fh)), 
                     IndentationSetupF=True)
    result.finalize()
    return result