コード例 #1
0
ファイル: core.py プロジェクト: yifsun/amplify
def argv_is_query_option(Cl, Option, Name, PrevQueryF):
    """Determines whether the setup parameter is a parameter related to 
    queries (or to code generation). If a mixed usage is detected an 
    error is issued.

    RETURN: query flag

    The query flag is the same as QueryF, except for one case: when QueryF
    was None (unset) and the option appeared on the command line. Then, the 
    return value tells whether the option was a query flag or not.

    ERROR: If there are mixed options, i.e. query flags and code generation
    flags appear at the same time.
    """
    query_f = (Name.find("query_") == 0)

    if   PrevQueryF is None:    return query_f
    elif PrevQueryF == query_f: return query_f

    # If debug exception is enabled, do not trigger errror
    if Cl.search(SETUP_INFO["_debug_exception_f"][0]): return query_f

    error_msg("Mixed options: query and code generation mode.\n"
              "The option(s) '%s' cannot be combined with preceeding options." \
              % str(SETUP_INFO[Name][0])[1:-1].replace("'",""))
コード例 #2
0
ファイル: mode.py プロジェクト: dkopecek/amplify
def parse(fh):
    """This function parses a mode description and enters it into the 
       'blackboard.mode_description_db'. Once all modes are parsed
       they can be translated into 'real' modes and are located in
       'blackboard.mode_db'. 
    """

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    skip_whitespace(fh)
    mode_name = read_identifier(fh, OnMissingStr="Missing identifier at beginning of mode definition.")

    # NOTE: constructor does register this mode in the mode_db
    new_mode  = ModeDescription(mode_name, SourceRef.from_FileHandle(fh))

    # (*) inherited modes / option_db
    skip_whitespace(fh)
    dummy = fh.read(1)
    if dummy not in [":", "{"]:
        error_msg("missing ':' or '{' after mode '%s'" % mode_name, fh)

    if dummy == ":":
        __parse_option_list(new_mode, fh)

    # (*) read in pattern-action pairs and events
    while __parse_element(new_mode, fh): 
        pass
コード例 #3
0
ファイル: utf8.py プロジェクト: dkopecek/amplify
def utf8_to_unicode(ByteSequence):
    """Unfortunately, there is no elegant way to do the utf8-decoding 
       safely in libPython, since due to strange behavior of a python 
       narrow build a character >= 0x10000 may appear as a 2 byte string 
       and cannot be handled by 'ord' in python 2.x.

       Thus: 
              utf8d = codecs.getdecoder("utf-8")
              return ord(utf8d("".join(map(chr, ByteSequence)))[0])

       would be unsafe. That's why we do it by hand here
    """
    # Assume that the byte sequence is valid, thus a byte sequence of length 'N'
    # has a N - 1 leading ones in the header plus a zero. Remaining bits in the
    # header are therefore 8 - N. All other bytes in the sequence start with bits '10'
    # and contain 6 bits of useful payload.
    header_bit_n = 8 - len(ByteSequence)
    mask         = (1 << header_bit_n) - 1
    value = ByteSequence[0] & mask
    for byte in ByteSequence[1:]:
        value <<= 6
        value |=  (byte & 0x3F)   # blend off the highest two bits
        # The highest two bits in a follow byte in utf8 MUST be '10'. Thus:
        if (byte & 0xC0) != 0x80:
            error_msg("Error in UTF8 encoded file. Inadmissible byte sequence detected. Found byte '%02X'" % byte)

    return value
コード例 #4
0
def __start_mode(applicable_mode_name_list, mode_name_list):
    """If more then one mode is defined, then that requires an explicit 
       definition 'start = mode'.
    """
    assert len(applicable_mode_name_list) != 0

    start_mode = blackboard.initial_mode.get_pure_code()
    if start_mode == "":
        # Choose an applicable mode as start mode
        start_mode = applicable_mode_name_list[0]
        blackboard.initial_mode = CodeFragment(start_mode)
        if len(applicable_mode_name_list) > 1:
            error_msg("No initial mode defined via 'start' while more than one applicable mode exists.\n" + \
                      "Use for example 'start = %s;' in the quex source file to define an initial mode." \
                      % start_mode)
        # This Branch: start mode is applicable and present

    else:
        FileName = blackboard.initial_mode.filename
        LineN = blackboard.initial_mode.line_n
        # Start mode present and applicable?
        verify_word_in_list(start_mode, mode_name_list,
                            "Start mode '%s' is not defined." % start_mode,
                            FileName, LineN)
        verify_word_in_list(
            start_mode, applicable_mode_name_list,
            "Start mode '%s' is inheritable only and cannot be instantiated." %
            start_mode, FileName, LineN)
コード例 #5
0
ファイル: code_fragment.py プロジェクト: coderjames/pascal
def parse(fh, CodeFragmentName, 
          ErrorOnFailureF=True, AllowBriefTokenSenderF=True, ContinueF=True):
    """RETURNS: An object of class UserCodeFragment containing
                line number, filename, and the code fragment.

                None in case of failure.
    """
    assert Setup.__class__ == QuexSetup
    assert type(ErrorOnFailureF)        == bool
    assert type(AllowBriefTokenSenderF) == bool

    skip_whitespace(fh)

    word = fh.read(2)
    if len(word) >= 1 and word[0] == "{":
        fh.seek(-1, 1) # unput the second character
        return __parse_normal(fh, CodeFragmentName)

    elif AllowBriefTokenSenderF and word == "=>":
        return __parse_brief_token_sender(fh, ContinueF)

    elif not ErrorOnFailureF:
        fh.seek(-2,1)
        return None
    else:
        error_msg("Missing code fragment after %s definition." % CodeFragmentName, fh)
コード例 #6
0
ファイル: code_fragment.py プロジェクト: coderjames/pascal
def __parse_brief_token_sender(fh, ContinueF):
    # shorthand for { self.send(TKN_SOMETHING); QUEX_SETTING_AFTER_SEND_CONTINUE_OR_RETURN(); }
    LanguageDB = Setup.language_db
    
    position = fh.tell()
    line_n   = get_current_line_info_number(fh) + 1
    try: 
        skip_whitespace(fh)
        position = fh.tell()

        code = __parse_token_id_specification_by_character_code(fh)
        if code != -1: 
            code = __create_token_sender_by_character_code(fh, code)
        else:
            skip_whitespace(fh)
            identifier = __read_token_identifier(fh)
            skip_whitespace(fh)
            if identifier in ["GOTO", "GOSUB", "GOUP"]:
                code = __create_mode_transition_and_token_sender(fh, identifier)
            else:
                code = __create_token_sender_by_token_name(fh, identifier)
                check_or_die(fh, ";")

        if code != "": 
            if ContinueF: code += "QUEX_SETTING_AFTER_SEND_CONTINUE_OR_RETURN();\n"
            return UserCodeFragment(code, fh.name, line_n, LanguageDB)
        else:
            return None

    except EndOfStreamException:
        fh.seek(position)
        error_msg("End of file reached while parsing token shortcut.", fh)
コード例 #7
0
ファイル: counter.py プロジェクト: dkopecek/amplify
def __parse(fh, result, IndentationSetupF=False):
    """Parses pattern definitions of the form:
   
          [ \t]                                       => grid 4;
          [:intersection([:alpha:], [\X064-\X066]):]  => space 1;

       In other words the right hand side *must* be a character set.
    """

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    while 1 + 1 == 2:
        skip_whitespace(fh)
        if check(fh, ">"): 
            break
        
        # A regular expression state machine
        pattern, identifier, sr = __parse_definition_head(fh, result)
        if pattern is None and IndentationSetupF:
            error_msg("Keyword '\\else' cannot be used in indentation setup.", fh)

        # '__parse_definition_head()' ensures that only identifiers mentioned in 
        # 'result' are accepted. 
        if not IndentationSetupF:
            value = read_value_specifier(fh, identifier, 1)
            result.specify(identifier, pattern, value, sr)
        else:
            result.specify(identifier, pattern, sr)

        if not check(fh, ";"):
            error_msg("Missing ';' after '%s' specification." % identifier, fh)

    return result
コード例 #8
0
ファイル: token_type.py プロジェクト: coderjames/pascal
def parse_section(fh, descriptor, already_defined_list):
    pos = fh.tell()
    try: 
        return __parse_section(fh, descriptor, already_defined_list)
    except EndOfStreamException:
        fh.seek(pos)
        error_msg("End of file reached while parsing token_type section.", fh)
コード例 #9
0
ファイル: code_fragment.py プロジェクト: liancheng/rose
def __parse_brief_token_sender(fh, ContinueF):
    # shorthand for { self.send(TKN_SOMETHING); QUEX_SETTING_AFTER_SEND_CONTINUE_OR_RETURN(); }
    LanguageDB = Setup.language_db

    position = fh.tell()
    line_n = get_current_line_info_number(fh) + 1
    try:
        skip_whitespace(fh)
        position = fh.tell()

        code = __parse_token_id_specification_by_character_code(fh)
        if code != -1:
            code = __create_token_sender_by_character_code(fh, code)
        else:
            skip_whitespace(fh)
            identifier = __read_token_identifier(fh)
            skip_whitespace(fh)
            if identifier in ["GOTO", "GOSUB", "GOUP"]:
                code = __create_mode_transition_and_token_sender(
                    fh, identifier)
            else:
                code = __create_token_sender_by_token_name(fh, identifier)
                check_or_die(fh, ";")

        if code != "":
            if ContinueF:
                code += "QUEX_SETTING_AFTER_SEND_CONTINUE_OR_RETURN();\n"
            return UserCodeFragment(code, fh.name, line_n, LanguageDB)
        else:
            return None

    except EndOfStreamException:
        fh.seek(position)
        error_msg("End of file reached while parsing token shortcut.", fh)
コード例 #10
0
ファイル: indentation_setup.py プロジェクト: liancheng/rose
 def __error_character_set_intersection(Before):
     error_msg("Character set specification '%s' intersects" % Name,
               FH,
               DontExitF=True,
               WarningF=False)
     error_msg("with definition for '%s' at this place." % Before.name,
               Before.file_name, Before.line_n)
コード例 #11
0
ファイル: indentation_setup.py プロジェクト: liancheng/rose
    def seal(self):
        if len(self.space_db) == 0 and len(self.grid_db) == 0:
            default_space = ord(' ')
            default_tab = ord('\t')
            bad = self.bad_character_set
            if bad.get().contains(default_space) == False:
                self.specify_space("[ ]", NumberSet(default_space), 1, self.fh)
            if bad.get().contains(default_tab) == False:
                self.specify_grid("[\\t]", NumberSet(default_tab), 4, self.fh)

            if len(self.space_db) == 0 and len(self.grid_db) == 0:
                error_msg(
                    "No space or grid defined for indentation counting. Default\n"
                    "values ' ' and '\\t' could not be used since they are specified as 'bad'.",
                    bad.file_name, bad.line_n)

        if self.newline_state_machine.get() is None:
            sm = StateMachine()
            end_idx = sm.add_transition(sm.init_state_index,
                                        NumberSet(ord('\n')),
                                        AcceptanceF=True)
            mid_idx = sm.add_transition(sm.init_state_index,
                                        NumberSet(ord('\r')),
                                        AcceptanceF=False)
            sm.add_transition(mid_idx,
                              NumberSet(ord('\n')),
                              end_idx,
                              AcceptanceF=False)
            self.specify_newline("(\\r\\n)|(\\n)", sm, self.fh)
コード例 #12
0
ファイル: token_type.py プロジェクト: liancheng/rose
def parse_section(fh, descriptor, already_defined_list):
    pos = fh.tell()
    try:
        return __parse_section(fh, descriptor, already_defined_list)
    except EndOfStreamException:
        fh.seek(pos)
        error_msg("End of file reached while parsing token_type section.", fh)
コード例 #13
0
ファイル: token_type.py プロジェクト: yifsun/amplify
def parse_standard_members(fh, section_name, descriptor, already_defined_list):
    if not check(fh, "{"):
        error_msg(
            "Missing opening '{' at begin of token_type section '%s'." %
            section_name, fh)

    position = fh.tell()

    while 1 + 1 == 2:
        try:
            result = parse_variable_definition(fh)
        except EndOfStreamException:
            fh.seek(position)
            error_eof("standard", fh)

        if result is None: return
        type_code_fragment, name = result[0], result[1]

        __validate_definition(type_code_fragment,
                              name,
                              already_defined_list,
                              StandardMembersF=True)

        if name == "id": descriptor.token_id_type = type_code_fragment
        elif name == "column_number":
            descriptor.column_number_type = type_code_fragment
        elif name == "line_number":
            descriptor.line_number_type = type_code_fragment
        else:
            assert False  # This should have been caught by the variable parser function

        already_defined_list.append([name, type_code_fragment])
コード例 #14
0
ファイル: construct.py プロジェクト: coderjames/pascal
def __delete_forbidden_ranges(sm, fh):
    """Unicode does define all code points >= 0. Thus there can be no code points
       below zero as it might result from some number set operations.

       NOTE: This operation might result in orphaned states that have to 
             be deleted.
    """
    global Setup

    character_value_limit = Setup.get_character_value_limit()
    for state in sm.states.values():

        for target_state_index, trigger_set in state.transitions().get_map().items():

            # Make sure, all transitions lie inside the unicode code range 
            if trigger_set.minimum() < UnicodeInterval.begin or trigger_set.supremum() >= UnicodeInterval.end:
                trigger_set.intersect_with(UnicodeInterval)

            if trigger_set.supremum() > character_value_limit:
                error_msg("Pattern contains character beyond the scope of the buffer element size (%s)\n" \
                          % Setup.get_character_value_limit_str() + \
                          "Please, cut the character range of the regular expression,\n"
                          "adapt \"--buffer-element-size\" or \"--buffer-element-type\",\n"       + \
                          "or specify '--buffer-element-size-irrelevant' to ignore the issue.", fh)

            if Setup.buffer_codec in ["utf16-le", "utf16-be"]:
                # Delete the forbidden interval: D800-DFFF
                if trigger_set.has_intersection(ForbiddenRange):
                    error_msg("Pattern contains characters in unicode range 0xD800-0xDFFF.\n"
                              "This range is not covered by UTF16. Cutting Interval.", fh, DontExitF=True)
                    trigger_set.cut_interval(ForbiddenRange)
            
            # If the operation resulted in cutting the path to the target state, then delete it.
            if trigger_set.is_empty():
                state.transitions().delete_transitions_to_target(target_state_index)
コード例 #15
0
ファイル: token_type.py プロジェクト: dkopecek/amplify
def parse_standard_members(fh, section_name, descriptor, already_defined_list):
    if not check(fh, "{"):
        error_msg("Missing opening '{' at begin of token_type section '%s'." % section_name, fh);

    position = fh.tell()

    while 1 + 1 == 2:
        try: 
            result = parse_variable_definition(fh) 
        except EndOfStreamException:
            fh.seek(position)
            error_eof("standard", fh)

        if result is None: return
        type_code_fragment, name = result[0], result[1]

        __validate_definition(type_code_fragment, name,
                              already_defined_list, StandardMembersF=True)

        if   name == "id":            descriptor.token_id_type      = type_code_fragment
        elif name == "column_number": descriptor.column_number_type = type_code_fragment
        elif name == "line_number":   descriptor.line_number_type   = type_code_fragment
        else:
            assert False # This should have been caught by the variable parser function

        already_defined_list.append([name, type_code_fragment])
コード例 #16
0
ファイル: validation.py プロジェクト: coderjames/pascal
def __check_file_name(setup, Candidate, Name):
    value             = setup.__dict__[Candidate]
    CommandLineOption = command_line_args(Candidate)

    if value == "": return

    if type(value) == list:
        for name in value:
            if name != "" and name[0] == "-": 
                error_msg("Quex refuses to work with file names that start with '-' (minus).\n"  + \
                          "Received '%s' for %s (%s)" % (value, name, repr(CommandLineOption)[1:-1]))
            if os.access(name, os.F_OK) == False:
                # error_msg("File %s (%s)\ncannot be found." % (name, Name))
                error_msg_file_not_found(name, Name)
    else:
        if value == "" or value[0] == "-":              return
        if os.access(value, os.F_OK):                   return
        if os.access(QUEX_PATH + "/" + value, os.F_OK): return
        if     os.access(os.path.dirname(value), os.F_OK) == False \
           and os.access(QUEX_PATH + "/" + os.path.dirname(value), os.F_OK) == False:
            error_msg("File '%s' is supposed to be located in directory '%s' or\n" % \
                      (os.path.basename(value), os.path.dirname(value)) + \
                      "'%s'. No such directories exist." % \
                      (QUEX_PATH + "/" + os.path.dirname(value)))
        error_msg_file_not_found(value, Name)
コード例 #17
0
ファイル: query.py プロジェクト: yifsun/amplify
def __handle_property_match(cl):
    property_follower = Setup.query_property_match
    if not property_follower: return

    sys.stderr.write("(please, wait for database parsing to complete)\n")

    fields = map(lambda x: x.strip(), property_follower.split("="))
    if len(fields) != 2:
        error_msg("Wrong property setting '%s'." % property_follower)

    # -- determine name and value
    name = fields[0]
    wild_card_expression = fields[1]

    # -- get the property from the database
    property = __get_property(name)
    if property is None:
        return True

    # -- find the character set for the given expression
    if property.type == "Binary":
        error_msg(
            "Binary property '%s' is not subject to value wild card matching.\n"
            % property.name)

    for value in property.get_wildcard_value_matches(wild_card_expression):
        print value
コード例 #18
0
ファイル: mode.py プロジェクト: coderjames/pascal
def __general_validate(fh, Mode, Name, pos):
    if Name == "on_indentation":
        fh.seek(pos)
        error_msg("Definition of 'on_indentation' is no longer supported since version 0.51.1.\n"
                  "Please, use 'on_indent' for the event of an opening indentation, 'on_dedent'\n"
                  "for closing indentation, and 'on_nodent' for no change in indentation.", fh) 


    def error_dedent_and_ndedent(code, A, B):
        filename = "(unknown)"
        line_n   = "0"
        if hasattr(code, "filename"): filename = code.filename
        if hasattr(code, "line_n"):   line_n   = code.line_n
        error_msg("Indentation event handler '%s' cannot be defined, because\n" % A,
                  fh, DontExitF=True, WarningF=False)
        error_msg("the alternative '%s' has already been defined." % B,
                  filename, line_n)

    if Name == "on_dedent" and Mode.events.has_key("on_n_dedent"):
        fh.seek(pos)
        code = Mode.events["on_n_dedent"]
        if code.get_code() != "":
            error_dedent_and_ndedent(code, "on_dedent", "on_n_dedent")
                      
    if Name == "on_n_dedent" and Mode.events.has_key("on_dedent"):
        fh.seek(pos)
        code = Mode.events["on_dedent"]
        if code.get_code() != "":
            error_dedent_and_ndedent(code, "on_n_dedent", "on_dedent")
コード例 #19
0
    def get_character_value_limit(self):
        """A buffer element is a chunk of memory of the size of the granularity
        of which the input pointer increases. For fixed size codecs, such as
        ASCII or UCS32, the BUFFER ELEMENT VALUE LIMIT is exactly the same as
        the CHARACTER VALUE LIMIT. 

        However, for dynamic sized codecs, such as UTF8 or UTF16, they are
        different. In UTF8, the input pointer increments by one byte on each
        state transition. However, a character may consist out of multiple
        bytes. The buffer element value limit is 256, but the character value
        limit is the whole range.
        
        
        RETURNS: Integer = supremum of possible character range, i.e.
                           one character behind the last possible.

                 sys.maxint, if no such limit exists.
        """
        buffer_element_size = self.buffer_element_size

        if buffer_element_size == -1: return sys.maxint

        try:
            result = 256 ** buffer_element_size
        except:
            file_in.error_msg("Error while trying to compute 256 to the 'buffer-element-size' (%i bytes)\n"   \
                              % buffer_element_size + \
                              "Adapt \"--buffer-element-size\" or \"--buffer-element-type\",\n"       + \
                              "or specify '--buffer-element-size-irrelevant' to ignore the issue.")

        if result > sys.maxint: return sys.maxint
        else:                   return result
コード例 #20
0
    def buffer_codec_prepare(self, BufferCodecName, BufferCodecFileName=None, Module=None):
        """Determines: Setup.buffer_codec_name
                       Setup.buffer_codec
        """
        if   BufferCodecName in ("utf8", "utf16"):
            assert Module is not None
            result = codec_db.CodecDynamicInfo(BufferCodecName, Module)
        elif BufferCodecFileName:
            os.path.splitext(os.path.basename(BufferCodecFileName))
            try: 
               os.path.splitext(os.path.basename(BufferCodecFileName))
            except:
                file_in.error_msg("cannot interpret string following '--codec-file'")
            result = codec_db.CodecTransformationInfo(FileName=BufferCodecFileName)
        elif BufferCodecName == "unicode":
            # (Still, 'icu' or 'iconv' may provide converted content, but ...) 
            # If the internal buffer is 'unicode', then the pattern's state 
            # machines are not converted. The requirement for the pattern's
            # range is the same as for the 'buffer element chunks'.
            result = codec_db.CodecInfo("unicode", 
                                NumberSet.from_range(0, self.get_character_value_limit()), 
                                NumberSet.from_range(0, self.get_character_value_limit()))
        elif BufferCodecName == "unit-test":
            result = codec_db.CodecInfo("unicode", 
                                NumberSet.from_range(-sys.maxint, sys.maxint),
                                NumberSet.from_range(-sys.maxint, sys.maxint))

        else:
            result = codec_db.CodecTransformationInfo(BufferCodecName)

        self.buffer_codec = result
コード例 #21
0
def __parse_event(new_mode, fh, word):
    pos = fh.tell()

    # Allow '<<EOF>>' and '<<FAIL>>' out of respect for classical tools like 'lex'
    if word == "<<EOF>>": word = "on_end_of_stream"
    elif word == "<<FAIL>>": word = "on_failure"
    elif word in blackboard.all_section_title_list:
        error_msg("Pattern '%s' is a quex section title. Has the closing '}' of mode %s \n" % (word, new_mode.name) \
                  + "been forgotten? Else use quotes, i.e. \"%s\"." % word, fh)
    elif len(word) < 3 or word[:3] != "on_":
        return False

    comment = "Unknown event handler '%s'. \n" % word + \
              "Note, that any pattern starting with 'on_' is considered an event handler.\n" + \
              "use double quotes to bracket patterns that start with 'on_'."

    __general_validate(fh, new_mode, word, pos)
    verify_word_in_list(word, event_handler_db.keys(), comment, fh)
    __validate_required_token_policy_queue(word, fh, pos)

    continue_f = True
    if word == "on_end_of_stream":
        # When a termination token is sent, no other token shall follow.
        # => Enforce return from the analyzer! Do not allow CONTINUE!
        continue_f = False

    new_mode.events[word] = code_fragment.parse(fh,
                                                "%s::%s event handler" %
                                                (new_mode.name, word),
                                                ContinueF=continue_f)

    return True
コード例 #22
0
def __general_validate(fh, Mode, Name, pos):
    if Name == "on_indentation":
        fh.seek(pos)
        error_msg(
            "Definition of 'on_indentation' is no longer supported since version 0.51.1.\n"
            "Please, use 'on_indent' for the event of an opening indentation, 'on_dedent'\n"
            "for closing indentation, and 'on_nodent' for no change in indentation.",
            fh)

    def error_dedent_and_ndedent(code, A, B):
        filename = "(unknown)"
        line_n = "0"
        if hasattr(code, "filename"): filename = code.filename
        if hasattr(code, "line_n"): line_n = code.line_n
        error_msg(
            "Indentation event handler '%s' cannot be defined, because\n" % A,
            fh,
            DontExitF=True,
            WarningF=False)
        error_msg("the alternative '%s' has already been defined." % B,
                  filename, line_n)

    if Name == "on_dedent" and Mode.events.has_key("on_n_dedent"):
        fh.seek(pos)
        code = Mode.events["on_n_dedent"]
        if code.get_code() != "":
            error_dedent_and_ndedent(code, "on_dedent", "on_n_dedent")

    if Name == "on_n_dedent" and Mode.events.has_key("on_dedent"):
        fh.seek(pos)
        code = Mode.events["on_dedent"]
        if code.get_code() != "":
            error_dedent_and_ndedent(code, "on_n_dedent", "on_dedent")
コード例 #23
0
def __warn_on_double_definition():
    """Double check that no token id appears twice. Again, this can only happen,
    if quex itself produced the numeric values for the token.

    If the token ids come from outside, Quex does not know the numeric value. It 
    cannot warn about double definitions.
    """
    assert len(Setup.token_id_foreign_definition_file) == 0

    clash_db = defaultdict(list)

    token_list = token_id_db.values()
    for i, x in enumerate(token_list):
        for y in token_list[i+1:]:
            if x.number != y.number: continue
            clash_db[x.number].append(x)
            clash_db[x.number].append(y)

    def find_source_reference(TokenList):
        for token in TokenList:
            if token.sr.is_void(): continue
            return token.sr
        return None
    
    if len(clash_db) != 0:
        item_list = clash_db.items()
        item_list.sort()
        sr = find_source_reference(item_list[0][1])
        error_msg("Following token ids have the same numeric value assigned:", 
                  sr, DontExitF=True)
        for x, token_id_list in item_list:
            sr = find_source_reference(token_id_list)
            token_ids_sorted = sorted(list(set(token_id_list)), key=attrgetter("name")) # Ensure uniqueness
            error_msg("  %s: %s" % (x, "".join(["%s, " % t.name for t in token_ids_sorted])), 
                      sr, DontExitF=True)
コード例 #24
0
    def detect_path_of_nothing_is_necessary(sm, Name, PostContextPresentF, fh):
        assert Name in ["", "pre-context", "post-context"]
        if sm is None: 
            return
        elif not sm.get_init_state().is_acceptance(): 
            return
        if len(Name) == 0: name_str = "core pattern"
        else:              name_str = Name

        msg = "The %s contains in a 'nothing is necessary' path in the state machine.\n"   \
              % name_str                                                                     + \
              "This means, that without reading a character the analyzer drops into\n"   + \
              "an acceptance state. "

        msg += { 
            "":
                "The analyzer would then stall.",

            "pre-context":
                "E.g., pattern 'x*/y/' means that zero or more 'x' are a pre-\n"             + \
                "condition for 'y'. If zero appearances of 'x' are enough, then obviously\n" + \
                "there is no pre-context for 'y'! Most likely the author intended 'x+/y/'.",

            "post-context":
                "A post context where nothing is necessary is superfluous.",
        }[Name]

        if Name != "post-context" and PostContextPresentF:
            msg += "\n"                                                          \
                   "Note: A post context does not change anything to that fact." 

        error_msg(msg, fh)
コード例 #25
0
def parse(fh,
          CodeFragmentName,
          ErrorOnFailureF=True,
          AllowBriefTokenSenderF=True,
          ContinueF=True):
    """RETURNS: An object of class CodeUser containing
                line number, filename, and the code fragment.

                None in case of failure.
    """
    assert type(ErrorOnFailureF) == bool
    assert type(AllowBriefTokenSenderF) == bool

    skip_whitespace(fh)

    word = fh.read(2)
    if len(word) >= 1 and word[0] == "{":
        if len(word) > 1: fh.seek(-1, 1)  # unput the second character
        return __parse_normal(fh, CodeFragmentName)

    elif AllowBriefTokenSenderF and word == "=>":
        return __parse_brief_token_sender(fh, ContinueF)

    elif not ErrorOnFailureF:
        fh.seek(-2, 1)
        return None
    else:
        error_msg(
            "Missing code fragment after %s definition." % CodeFragmentName,
            fh)
コード例 #26
0
ファイル: mode.py プロジェクト: coderjames/pascal
def __parse_base_mode_list(fh, new_mode):
    new_mode.base_modes = []
    trailing_comma_f    = False
    while 1 + 1 == 2:
        if   check(fh, "{"): fh.seek(-1, 1); break
        elif check(fh, "<"): fh.seek(-1, 1); break

        skip_whitespace(fh)
        identifier = read_identifier(fh)
        if identifier == "": break

        new_mode.base_modes.append(identifier)
        trailing_comma_f = False
        if not check(fh, ","): break
        trailing_comma_f = True


    if trailing_comma_f:
        error_msg("Trailing ',' after base mode '%s'." % new_mode.base_modes[-1], fh, 
                  DontExitF=True, WarningF=True)
        
    elif len(new_mode.base_modes) != 0:
        # This check is a 'service' -- for those who follow the old convention
        pos = fh.tell()
        skip_whitespace(fh)
        dummy_identifier = read_identifier(fh)
        if dummy_identifier != "":
            error_msg("Missing separating ',' between base modes '%s' and '%s'.\n" \
                      % (new_mode.base_modes[-1], dummy_identifier) + \
                      "(The comma separator is mandatory since quex 0.53.1)", fh)
        fh.seek(pos)
コード例 #27
0
def __start_mode(applicable_mode_name_list, mode_name_list):
    """If more then one mode is defined, then that requires an explicit 
       definition 'start = mode'.
    """
    assert len(applicable_mode_name_list) != 0

    start_mode = blackboard.initial_mode.get_pure_code()
    if start_mode == "":
        # Choose an applicable mode as start mode
        start_mode              = applicable_mode_name_list[0]
        blackboard.initial_mode = CodeFragment(start_mode)
        if len(applicable_mode_name_list) > 1:
            error_msg("No initial mode defined via 'start' while more than one applicable mode exists.\n" + \
                      "Use for example 'start = %s;' in the quex source file to define an initial mode." \
                      % start_mode)
        # This Branch: start mode is applicable and present

    else: 
        FileName = blackboard.initial_mode.filename
        LineN    = blackboard.initial_mode.line_n
        # Start mode present and applicable?
        verify_word_in_list(start_mode, mode_name_list,
                            "Start mode '%s' is not defined." % start_mode,
                            FileName, LineN)
        verify_word_in_list(start_mode, applicable_mode_name_list,
                            "Start mode '%s' is inheritable only and cannot be instantiated." % start_mode,
                            FileName, LineN)
コード例 #28
0
def __post_process(fh, StartPosition, object, ReturnRE_StringF):
    assert    object is None                   \
           or isinstance(object, Pattern) \
           or isinstance(object, StateMachine) \
           or isinstance(object, NumberSet)

    if isinstance(fh, StringIO):
        regular_expression = ""
    else:
        end_position = fh.tell()
        fh.seek(StartPosition)
        regular_expression = fh.read(end_position - StartPosition)
        if regular_expression == "":
            regular_expression = fh.read(1)
            fh.seek(-1, 1)

    # (*) error in regular expression?
    if object is None:
        error_msg(
            "No valid regular expression detected, found '%s'." %
            regular_expression, fh)

    # NOT: Do not transform here, since transformation might happen twice when patterns
    #      are defined and when they are replaced.
    if ReturnRE_StringF: return regular_expression, object
    else: return object
コード例 #29
0
def parse(fh):
    """This function parses a mode description and enters it into the 
       'blackboard.mode_description_db'. Once all modes are parsed
       they can be translated into 'real' modes and are located in
       'blackboard.mode_db'. 
    """

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    skip_whitespace(fh)
    mode_name = read_identifier(
        fh, OnMissingStr="Missing identifier at beginning of mode definition.")

    # NOTE: constructor does register this mode in the mode_db
    new_mode = ModeDescription(mode_name, SourceRef.from_FileHandle(fh))

    # (*) inherited modes / option_db
    skip_whitespace(fh)
    dummy = fh.read(1)
    if dummy not in [":", "{"]:
        error_msg("missing ':' or '{' after mode '%s'" % mode_name, fh)

    if dummy == ":":
        __parse_option_list(new_mode, fh)

    # (*) read in pattern-action pairs and events
    while __parse_element(new_mode, fh):
        pass
コード例 #30
0
ファイル: counter.py プロジェクト: yifsun/amplify
def __parse(fh, result, IndentationSetupF=False):
    """Parses pattern definitions of the form:
   
          [ \t]                                       => grid 4;
          [:intersection([:alpha:], [\X064-\X066]):]  => space 1;

       In other words the right hand side *must* be a character set.
    """

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    while 1 + 1 == 2:
        skip_whitespace(fh)
        if check(fh, ">"):
            break

        # A regular expression state machine
        pattern, identifier, sr = __parse_definition_head(fh, result)
        if pattern is None and IndentationSetupF:
            error_msg("Keyword '\\else' cannot be used in indentation setup.",
                      fh)

        # '__parse_definition_head()' ensures that only identifiers mentioned in
        # 'result' are accepted.
        if not IndentationSetupF:
            value = read_value_specifier(fh, identifier, 1)
            result.specify(identifier, pattern, value, sr)
        else:
            result.specify(identifier, pattern, sr)

        if not check(fh, ";"):
            error_msg("Missing ';' after '%s' specification." % identifier, fh)

    return result
コード例 #31
0
ファイル: utf8.py プロジェクト: dkopecek/amplify
def unicode_to_utf8(UnicodeValue):
    if UnicodeValue < 0x80:
        return [ UnicodeValue, ]
    elif UnicodeValue < 0x800:
        # Bits: 5 + 6 
        return [ 0xC0 | ((UnicodeValue >> 6) & 0x1F),
                 0x80 | ((UnicodeValue     ) & 0x3F)]
    elif UnicodeValue < 0x10000:
        # Bits: 4 + 6 + 6
        return [ 0xE0 | ((UnicodeValue >> 12) & 0x0F),
                 0x80 | ((UnicodeValue >>  6) & 0x3F),
                 0x80 | ((UnicodeValue      ) & 0x3F)]
    elif UnicodeValue < 0x00200000:
        # Bits: 3 + 6 + 6 + 6
        return [ 0xF0 | ((UnicodeValue >> 18) & 0x07),
                 0x80 | ((UnicodeValue >> 12) & 0x3F),
                 0x80 | ((UnicodeValue >>  6) & 0x3F),
                 0x80 | ((UnicodeValue      ) & 0x3F)]
    elif UnicodeValue < 0x04000000L:
        # Bits: 2 + 6 + 6 + 6 + 6
        return [ 0xF0 | ((UnicodeValue >> 24) & 0x03),
                 0x80 | ((UnicodeValue >> 18) & 0x3F),
                 0x80 | ((UnicodeValue >> 12) & 0x3F),
                 0x80 | ((UnicodeValue >>  6) & 0x3F),
                 0x80 | ((UnicodeValue      ) & 0x3F)]
    elif UnicodeValue < 0x80000000L:
        # Bits: 1 + 6 + 6 + 6 + 6 + 6
        return [ 0xF0 | ((UnicodeValue >> 30) & 0x01),
                 0x80 | ((UnicodeValue >> 24) & 0x3F),
                 0x80 | ((UnicodeValue >> 18) & 0x3F),
                 0x80 | ((UnicodeValue >> 12) & 0x3F),
                 0x80 | ((UnicodeValue >>  6) & 0x3F),
                 0x80 | ((UnicodeValue      ) & 0x3F)]
    else:
        error_msg("Unicode character 0x%8X > 0x7FFFFFFF detected. Cannot be handled." % UnicodeValue)
コード例 #32
0
ファイル: query.py プロジェクト: coderjames/pascal
def __handle_property_match(cl):
    property_follower = cl.follow("", "--property-match")
    sys.stderr.write("(please, wait for database parsing to complete)\n")

    if property_follower == "":
        return

    fields = map(lambda x: x.strip(), property_follower.split("="))
    if len(fields) != 2:
        error_msg("Wrong property setting '%s'." % property_follower)

    # -- determine name and value
    name                 = fields[0]
    wild_card_expression = fields[1]

    # -- get the property from the database
    property = __get_property(name)
    if property is None: 
        return True

    # -- find the character set for the given expression
    if property.type == "Binary":
        error_msg("Binary property '%s' is not subject to value wild card matching.\n" % property.name)

    for value in property.get_wildcard_value_matches(wild_card_expression):
        print value
コード例 #33
0
def token_id_db_verify_or_enter_token_id(fh, TokenName):
    global Setup

    prefix_less_TokenName = cut_token_id_prefix(TokenName, fh)

    # Occasionally add token id automatically to database
    if not blackboard.token_id_db.has_key(prefix_less_TokenName):
        # DO NOT ENFORCE THE TOKEN ID TO BE DEFINED, BECAUSE WHEN THE TOKEN ID
        # IS DEFINED IN C-CODE, THE IDENTIFICATION IS NOT 100% SAFE.
        if TokenName in blackboard.token_id_db.keys():
            msg = "Token id '%s' defined implicitly.\n" % TokenName
            msg += "'%s' has been defined in a token { ... } section!\n" % \
                   (Setup.token_id_prefix + TokenName)
            msg += "Token ids in the token { ... } section are automatically prefixed."
            error_msg(msg,
                      fh,
                      DontExitF=True,
                      SuppressCode=NotificationDB.
                      warning_usage_of_undefined_token_id_name)
        else:
            # Warning is posted later when all implicit tokens have been
            # collected. See "token_id_maker.__propose_implicit_token_definitions()"
            blackboard.token_id_implicit_list.append(
                (prefix_less_TokenName, SourceRef.from_FileHandle(fh)))

        # Enter the implicit token id definition in the database
        blackboard.token_id_db[prefix_less_TokenName] = \
                TokenInfo(prefix_less_TokenName, None, None,
                          SourceRef.from_FileHandle(fh))
コード例 #34
0
 def __check_on_orphan_states(Place, sm):
     orphan_state_list = sm.get_orphaned_state_index_list()
     if len(orphan_state_list) == 0: return
     error_msg("After '%s'" % Place + "\n" + \
               "Orphaned state(s) detected in regular expression (optimization lack).\n" + \
               "Please, log a defect at the projects website quex.sourceforge.net.\n"    + \
               "Orphan state(s) = " + repr(orphan_state_list))
コード例 #35
0
ファイル: counter.py プロジェクト: yifsun/amplify
    def __sm_newline_default(self):
        """Default newline: '(\n)|(\r\n)'
        """
        global cc_type_name_db

        newline_set = NumberSet(ord('\n'))
        retour_set  = NumberSet(ord('\r'))

        before = self.count_command_map.find_occupier(newline_set, set())
        if before is not None:
            error_msg("Trying to implement default newline: '\\n' or '\\r\\n'.\n" 
                      "The '\\n' option is not possible, since it has been occupied by '%s'.\n" \
                      "No newline can be defined by default."
                      % cc_type_name_db[before.cc_type], before.sr, DontExitF=True, 
                      SuppressCode=NotificationDB.warning_default_newline_0A_impossible)
            # In this case, no newline can be defined!
            return

        sm = StateMachine.from_character_set(newline_set)

        if Setup.dos_carriage_return_newline_f:
            before = self.count_command_map.find_occupier(retour_set, set())
            if before is not None:
                error_msg("Trying to implement default newline: '\\n' or '\\r\\n'.\n" 
                          "The '\\r\\n' option is not possible, since '\\r' has been occupied by '%s'." \
                          % cc_type_name_db[before.cc_type],
                          before.sr, DontExitF=True, 
                          SuppressCode=NotificationDB.warning_default_newline_0D_impossible)
            else:
                sm.add_transition_sequence(sm.init_state_index, [retour_set, newline_set])

        return sm
コード例 #36
0
def utf8_to_unicode(ByteSequence):
    """Unfortunately, there is no elegant way to do the utf8-decoding 
       safely in libPython, since due to strange behavior of a python 
       narrow build a character >= 0x10000 may appear as a 2 byte string 
       and cannot be handled by 'ord' in python 2.x.

       Thus: 
              utf8d = codecs.getdecoder("utf-8")
              return ord(utf8d("".join(map(chr, ByteSequence)))[0])

       would be unsafe. That's why we do it by hand here
    """
    # Assume that the byte sequence is valid, thus a byte sequence of length 'N'
    # has a N - 1 leading ones in the header plus a zero. Remaining bits in the
    # header are therefore 8 - N. All other bytes in the sequence start with bits '10'
    # and contain 6 bits of useful payload.
    header_bit_n = 8 - len(ByteSequence)
    mask = (1 << header_bit_n) - 1
    value = ByteSequence[0] & mask
    for byte in ByteSequence[1:]:
        value <<= 6
        value |= (byte & 0x3F)  # blend off the highest two bits
        # The highest two bits in a follow byte in utf8 MUST be '10'. Thus:
        if (byte & 0xC0) != 0x80:
            error_msg(
                "Error in UTF8 encoded file. Inadmissible byte sequence detected."
            )

    return value
コード例 #37
0
ファイル: counter.py プロジェクト: yifsun/amplify
    def check_homogenous_space_counts(self):
        common = None
        for character_set, info in self.__map:
            if info.cc_type != E_CharacterCountType.COLUMN: 
                if info.cc_type == E_CharacterCountType.GRID: 
                    return
                continue
            elif type(info.value) in (str, unicode): 
                # If there is one single 'variable' grid value, 
                # then no assumptions can be made.
                return
            elif common is None:
                common = info
            elif common.value != info.value:
                # space counts are not homogeneous
                return

        if common is None:
            return
            
        error_msg("Setup does not contain a grid but only homogeneous space counts of %i.\n" \
                  % common.value + \
                  "This setup is equivalent to a setup with space counts of 1. Space counts\n" + \
                  "of 1 are the fastest to compute.", 
                  common.sr, DontExitF=True)
コード例 #38
0
ファイル: counter.py プロジェクト: yifsun/amplify
 def check_grid_specification(self, Value, sr):
     if   Value == 0: 
         error_msg("A grid count of 0 is nonsense. May be define a space count of 0.", sr)
     elif Value == 1:
         error_msg("Indentation grid counts of '1' are equivalent of to a space\n" + \
                   "count of '1'. The latter is faster to compute.",
                       sr, DontExitF=True)
コード例 #39
0
ファイル: query.py プロジェクト: coderjames/pascal
def do(ARGV):
    """Performs a query based on the given command line arguments.
       RETURNS: True if a query was performed.
                False if not query was requested.
    """
    cl = GetPot(ARGV, SectionsEnabledF=False)

    success_f = False

    # Regular Expressions extract the BufferLimitCode and the PathTerminatorCode
    # from the sets. So let us define them outside the normal range.
    backup_buffer_limit_code = Setup.buffer_limit_code
    backup_path_limit_code   = Setup.path_limit_code
    Setup.buffer_limit_code = -1
    Setup.path_limit_code   = -1

    try:
        success_f = True
        if   search_and_validate(cl, "--codec-info"):         __handle_codec(cl)
        elif search_and_validate(cl, "--codec-file-info"):    __handle_codec_file(cl)
        elif search_and_validate(cl, "--codec-for-language"): __handle_codec_for_language(cl)
        elif search_and_validate(cl, "--property"):           __handle_property(cl)
        elif search_and_validate(cl, "--set-by-property"):    __handle_set_by_property(cl)
        elif search_and_validate(cl, "--set-by-expression"):  __handle_set_by_expression(cl)
        elif search_and_validate(cl, "--property-match"):     __handle_property_match(cl)
        else:                                                 success_f = False

    except RegularExpressionException, x:
        error_msg(x.message)
コード例 #40
0
ファイル: counter.py プロジェクト: yifsun/amplify
    def check_grid_values_integer_multiples(self):
        """If there are no spaces and the grid is on a homogeneous scale,
           => then the grid can be transformed into 'easy-to-compute' spaces.
        """
        grid_value_list = []
        min_info        = None
        for character_set, info in self.__map:
            if info.cc_type != E_CharacterCountType.GRID: 
                if info.cc_type == E_CharacterCountType.COLUMN: 
                    return
                continue
            elif type(info.value) in (str, unicode): 
                # If there is one single 'variable' grid value, 
                # then no assumptions can be made.
                return
            grid_value_list.append(info.value)
            if min_info is None or info.value < min_info.value:
                min_info = info

        if min_info is None:
            return

        # Are all grid values a multiple of the minimum?
        if len(filter(lambda x: x % min_info.value == 0, grid_value_list)) != len(grid_value_list):
            return

        error_msg("Setup does not contain spaces, only grids (tabulators). All grid\n" \
                  "widths are multiples of %i. The grid setup %s\n" \
                  % (min_info.value, repr(sorted(grid_value_list))[1:-1]) + \
                  "is equivalent to a setup with space counts %s.\n" \
                  % repr(map(lambda x: x / min_info.value, sorted(grid_value_list)))[1:-1] + \
                  "Space counts are faster to compute.", 
                  min_info.sr, DontExitF=True)
コード例 #41
0
ファイル: validation.py プロジェクト: yifsun/amplify
def __check_file_name(setup,
                      Candidate,
                      Name,
                      Index=None,
                      CommandLineOption=None):
    value = setup.__dict__[Candidate]
    if len(value) == 0: return

    if CommandLineOption is None:
        CommandLineOption = command_line_args(Candidate)

    if Index is not None:
        if type(value) != list or len(value) <= Index: value = ""
        else: value = value[Index]

    if type(value) == list:
        for name in value:
            if name != "" and name[0] == "-":
                error_msg("Quex refuses to work with file names that start with '-' (minus).\n"  + \
                          "Received '%s' for %s (%s)" % (value, name, repr(CommandLineOption)[1:-1]))
            if os.access(name, os.F_OK) == False:
                # error_msg("File %s (%s)\ncannot be found." % (name, Name))
                error_msg_file_not_found(name, Name)
    else:
        if value == "" or value[0] == "-": return
        if os.access(value, os.F_OK): return
        if os.access(QUEX_PATH + "/" + value, os.F_OK): return
        if     os.access(os.path.dirname(value), os.F_OK) == False \
           and os.access(QUEX_PATH + "/" + os.path.dirname(value), os.F_OK) == False:
            error_msg("File '%s' is supposed to be located in directory '%s' or\n" % \
                      (os.path.basename(value), os.path.dirname(value)) + \
                      "'%s'. No such directories exist." % \
                      (QUEX_PATH + "/" + os.path.dirname(value)))
        error_msg_file_not_found(value, Name)
コード例 #42
0
ファイル: mode.py プロジェクト: coderjames/pascal
def __parse_event(new_mode, fh, word):
    pos = fh.tell()

    # Allow '<<EOF>>' and '<<FAIL>>' out of respect for classical tools like 'lex'
    if   word == "<<EOF>>":                  word = "on_end_of_stream"
    elif word == "<<FAIL>>":                 word = "on_failure"
    elif word in blackboard.all_section_title_list:
        error_msg("Pattern '%s' is a quex section title. Has the closing '}' of mode %s \n" % (word, new_mode.name) \
                  + "been forgotten? Else use quotes, i.e. \"%s\"." % word, fh)
    elif len(word) < 3 or word[:3] != "on_": return False

    comment = "Unknown event handler '%s'. \n" % word + \
              "Note, that any pattern starting with 'on_' is considered an event handler.\n" + \
              "use double quotes to bracket patterns that start with 'on_'."

    __general_validate(fh, new_mode, word, pos)
    verify_word_in_list(word, event_handler_db.keys(), comment, fh)
    __validate_required_token_policy_queue(word, fh, pos)

    continue_f = True
    if word == "on_end_of_stream":
        # When a termination token is sent, no other token shall follow. 
        # => Enforce return from the analyzer! Do not allow CONTINUE!
        continue_f = False

    new_mode.events[word] = code_fragment.parse(fh, "%s::%s event handler" % (new_mode.name, word),
                                                ContinueF=continue_f)

    return True
コード例 #43
0
 def __check_on_orphan_states(Place, sm):
     orphan_state_list = sm.get_orphaned_state_index_list()
     if len(orphan_state_list) == 0: return
     error_msg("After '%s'" % Place + "\n" + \
               "Orphaned state(s) detected in regular expression (optimization lack).\n" + \
               "Please, log a defect at the projects website quex.sourceforge.net.\n"    + \
               "Orphan state(s) = " + repr(orphan_state_list)) 
コード例 #44
0
ファイル: core.py プロジェクト: coderjames/pascal
def __get_float(MemberName):
    ValueStr = setup.__dict__[MemberName]
    if type(ValueStr) == float: return ValueStr
    try:
        return float(ValueStr)
    except:
        option_name = repr(SETUP_INFO[MemberName][0])[1:-1]
        error_msg("Cannot convert '%s' into an floating point number for '%s'" % (ValueStr, option_name))
コード例 #45
0
ファイル: parser.py プロジェクト: dkopecek/amplify
def open_data_base_file(Filename):
    try: 
        fh = open(unicode_db_directory + "/" + Filename, "rb")
    except:
        error_msg("Fatal---Unicode Database File '%s' not found!\n" % Filename + \
                  "QUEX_PATH='%s'\n" % QUEX_PATH + \
                  "Unicode Database Directory: '%s'" % unicode_db_directory)
    return fh
コード例 #46
0
ファイル: counter.py プロジェクト: yifsun/amplify
 def consistency_check(self):
     self.count_command_map.check_defined(self.sr, E_CharacterCountType.WHITESPACE)
     self.count_command_map.check_defined(self.sr, E_CharacterCountType.BEGIN_NEWLINE)
     if self.sm_newline_suppressor.get() is not None:
         if self.sm_newline.get() is None:
             error_msg("A newline 'suppressor' has been defined.\n"
                       "But there is no 'newline' in indentation defintion.", 
                       self.sm_newline_suppressor.sr)
コード例 #47
0
ファイル: action_info.py プロジェクト: coderjames/pascal
 def set(self, Value, fh):
     if self.__value is not None:
         error_msg("%s has been defined more than once.\n" % self.name, fh, DontExitF=True)
         error_msg("previous definition has been here.\n", self.file_name, self.line_n)
                   
     self.__value   = Value
     self.file_name = fh.name
     self.line_n    = get_current_line_info_number(fh)
コード例 #48
0
ファイル: validation.py プロジェクト: coderjames/pascal
def __check_identifier(setup, Candidate, Name):
    value = setup.__dict__[Candidate]
    if is_identifier(value): return

    CommandLineOption = SETUP_INFO[Candidate][0]

    error_msg("%s must be a valid identifier (%s).\n" % (Name, repr(CommandLineOption)[1:-1]) + \
              "Received: '%s'" % value)
コード例 #49
0
ファイル: token_type.py プロジェクト: dkopecek/amplify
def parse_distinct_members(fh, section_name, descriptor, already_defined_list):
    if not check(fh, "{"):
        error_msg("Missing opening '{' at begin of token_type section '%s'." % section_name, fh);

    result = parse_variable_definition_list(fh, "distinct", already_defined_list)
    if result == {}: 
        error_msg("Missing variable definition in token_type 'distinct' section.", fh)
    descriptor.distinct_db = result
コード例 #50
0
def open_data_base_file(Filename):
    try:
        fh = open(unicode_db_directory + "/" + Filename, "rb")
    except:
        error_msg("Fatal---Unicode Database File '%s' not found!\n" % Filename + \
                  "QUEX_PATH='%s'\n" % QUEX_PATH + \
                  "Unicode Database Directory: '%s'" % unicode_db_directory)
    return fh
コード例 #51
0
def __check_identifier(setup, Candidate, Name):
    value = setup.__dict__[Candidate]
    if is_identifier(value): return

    CommandLineOption = SETUP_INFO[Candidate][0]

    error_msg("%s must be a valid identifier (%s).\n" % (Name, repr(CommandLineOption)[1:-1]) + \
              "Received: '%s'" % value)
コード例 #52
0
def snap_replacement(stream, PatternDict, StateMachineF=True):
    """Snaps a predefined pattern from the input string and returns the resulting
       state machine.
    """
    skip_whitespace(stream)
    pattern_name = read_identifier(stream)
    if pattern_name == "":
        raise RegularExpressionException(
            "Pattern replacement expression misses identifier after '{'.")
    skip_whitespace(stream)

    if not check(stream, "}"):
        raise RegularExpressionException("Pattern replacement expression misses closing '}' after '%s'." \
                                         % pattern_name)

    verify_word_in_list(
        pattern_name, PatternDict.keys(),
        "Specifier '%s' not found in any preceeding 'define { ... }' section."
        % pattern_name, stream)

    reference = PatternDict[pattern_name]
    assert reference.__class__.__name__ == "PatternShorthand"

    # The replacement may be a state machine or a number set
    if StateMachineF:
        # Get a cloned version of state machine
        state_machine = reference.get_state_machine()
        assert isinstance(state_machine, StateMachine)

        # It is essential that state machines defined as patterns do not
        # have origins. Otherwise, the optimization of patterns that
        # contain pattern replacements might get confused and can
        # not find all optimizations.
        assert state_machine.has_origins() == False

        # A state machine, that contains pre- or post- conditions cannot be part
        # of a replacement. The addition of new post-contexts would mess up the pattern.
        ## if state_machine.has_pre_or_post_context():
        ##    error_msg("Pre- or post-conditioned pattern was used in replacement.\n" + \
        ##              "Quex's regular expression grammar does not allow this.", stream)

        return state_machine

    else:
        # Get a cloned version of character set
        character_set = reference.get_character_set()
        if character_set is None:
            error_msg(
                "Replacement in character set expression must be a character set.\n"
                "Specifier '%s' relates to a pattern state machine." %
                pattern_name, stream)

        if character_set.is_empty():
            error_msg(
                "Referenced character set '%s' is empty.\nAborted." %
                pattern_name, stream)

        return character_set