Ejemplo n.º 1
0
def test(Text):
    global count_n
    count_n += 1

    if Text.find("\n") == -1:
        print "(%i) |%s|\n" % (count_n, Text)
    else:
        print "(%i)\n::\n%s\n::\n" % (count_n, Text)

    sh = StringIO(Text)
    sh.name = "test_string"

    descr = None
    if "debug" in sys.argv and "%s" % count_n == sys.argv[3]:
        # Try beyond an exception catcher
        descr = counter.LineColumnCount_Prep(sh).parse()

    try:
        descr = counter.LineColumnCount_Prep(sh).parse()

    except EndOfStreamException:
        error.log("End of file reached while parsing 'counter' section.",
                  sh,
                  DontExitF=True)

    except:
        print "Exception!"

    if descr is not None: print descr
    print
Ejemplo n.º 2
0
def read_integer(fh):
    pos = fh.tell()

    base, digit_list = get_number_base(fh)
    if base is None: return None

    txt = ""
    while 1 + 1 == 2:
        tmp = fh.read(1)
        if   tmp == "": break
        elif tmp not in digit_list: fh.seek(-1, 1); break
        txt += tmp

    # If we drop out on a digit, then let us assume that the user just missed a point
    if tmp.isdigit() or (tmp in list("ABCDEFabcdef")):
        error.log("Digit '%s' cannot be part of an expression of base %s." % (tmp, base), fh)

    txt = txt.replace(".", "")
    if len(txt) == 0:
        if base in [2, 8, 16, "roman", "Napier"]:
            error.log("Missing digits after for number of base %s, found '%s'." % (str(base), tmp), fh)
        fh.seek(pos)
        return None

    # Octal, decimal, and hexadecimal numbers
    if   base in [2, 8, 10, 16]: return int(txt, base)
    elif base == "roman":        return __roman_number(txt, fh)
    elif base == "Napier":       return __napier_number(txt, fh)
    else:                        return __binary_number(txt, fh)
Ejemplo n.º 3
0
def __roman_number(Text, fh):
    """Source: http://code.activestate.com -- Code Recipes 
               Recipe 81611 by Paul Winkler.
    """
    input = Text.upper() 
    # map of (numeral, value, maxcount) tuples
    roman_numeral_map = (('M',  1000, None), ('CM', 900, 1),
                         ('D',  500, 1),     ('CD', 400, 1),
                         ('C',  100, 3),     ('XC', 90, 1),
                         ('L',  50, 1),      ('XL', 40, 1),
                         ('X',  10, 3),      ('IX', 9, 1),
                         ('V',  5, 1),       ('IV', 4, 1), 
                         ('I',  1, 3))

    result, index = 0, 0
    for numeral, value, maxcount in roman_numeral_map:
        count = 0
        while input[index: index + len(numeral)] == numeral:
            count += 1 # how many of this numeral we have
            if maxcount is not None and count > maxcount:
                error.log("input 0r%s is not a valid roman numeral." % Text, fh)
            result += value
            index  += len(numeral)

    if index < len(input): # There are characters unaccounted for.
        error.log("input 0r%s is not a valid roman numeral." % Text, fh)

    return result
Ejemplo n.º 4
0
def __parse_range_skipper_option(fh, identifier, new_mode):
    """A non-nesting skipper can contain a full fledged regular expression as opener,
    since it only effects the trigger. Not so the nested range skipper-see below.
    """
    # Range state machines only accept 'strings' not state machines
    # Pattern: opener 'white space' closer 'white space' '>'
    skip_whitespace(fh)
    opener_pattern = regular_expression.parse_non_precontexted_pattern(
        fh, identifier, ">", AllowNothingIsFineF=False)
    _assert_pattern_constaints(opener_pattern, "Skip range opener", fh)

    skip_whitespace(fh)
    closer_pattern = regular_expression.parse_non_precontexted_pattern(
        fh, identifier, ">", AllowNothingIsFineF=True)
    _assert_pattern_constaints(closer_pattern, "Skip range closer", fh)

    opener_pattern.set_pattern_string("<%s open>" % identifier)
    closer_pattern.set_pattern_string("<%s close>" % identifier)

    # -- closer
    skip_whitespace(fh)
    if fh.read(1) != ">":
        error.log("missing closing '>' for mode option '%s'" % identifier, fh)

    return SkipRangeData(opener_pattern, closer_pattern)
Ejemplo n.º 5
0
Archivo: core.py Proyecto: xxyzzzq/quex
def argv_is_query_option(Cl, Option, Name, PrevQueryF):
    """Determines whether the setup parameter is a parameter related to 
    queries (or to code generation). If a mixed usage is detected an 
    error is issued.

    RETURN: query flag

    The query flag is the same as QueryF, except for one case: when QueryF
    was None (unset) and the option appeared on the command line. Then, the 
    return value tells whether the option was a query flag or not.

    ERROR: If there are mixed options, i.e. query flags and code generation
    flags appear at the same time.
    """
    query_f = (Name.find("query_") == 0)

    if   PrevQueryF is None:    return query_f
    elif PrevQueryF == query_f: return query_f

    # If debug exception is enabled, do not trigger errror
    if Cl.search(SETUP_INFO["_debug_exception_f"][0]): return query_f

    error.log("Mixed options: query and code generation mode.\n"
              "The option(s) '%s' cannot be combined with preceeding options." \
              % str(SETUP_INFO[Name][0])[1:-1].replace("'",""))
Ejemplo n.º 6
0
def split_first_transition(SmList):
    """Perform separation:
    
          state machine  ---->    first transition  
                               +  appendix state machine
    
    for each state machine.

    RETURNS: list of(character set, appendix state machine)

    Character sets MAY INTERSECT, and MAY REQUIRE NON-UNIFORM count actions.
    """
    result = []
    appendix_sm_to_iid_original_db = {}
    for original_sm in SmList:
        iid_original = original_sm.get_id()
        for first_set, appendix_sm in _cut_first_transition(
                original_sm, CloneStateMachineId=False):
            # Every appendix DFA gets its own 'id'.
            # HOWEVER: Multiple appendix DFAs might match to same 'acceptance id',
            #          => Such DFAs transit to same terminal upon acceptance.
            appendix_sm.mark_state_origins()
            result.append((first_set, appendix_sm))
            assert appendix_sm.get_id() not in appendix_sm_to_iid_original_db
            appendix_sm_to_iid_original_db[appendix_sm.get_id()] = iid_original

    for character_set, appendix_sm in result:
        init_state = appendix_sm.get_init_state()
        if init_state.input_position_store_f():
            error.log(
                "skip/skip_range/indentation/counter implementation.\n"
                "Inadmissible post context after first character.\n"
                "(This should have been detected during the parsing process)")

    return result, appendix_sm_to_iid_original_db
Ejemplo n.º 7
0
def parse_standard_members(fh, section_name, descriptor, already_defined_list):
    if not check(fh, "{"):
        error.log("Missing opening '{' at begin of token_type section '%s'." % section_name, fh);

    position = fh.tell()

    while 1 + 1 == 2:
        try: 
            result = parse_variable_definition(fh) 
        except EndOfStreamException:
            fh.seek(position)
            error.error_eof("standard", fh)

        if result is None: return
        type_code_fragment, name = result[0], result[1]

        __validate_definition(type_code_fragment, name,
                              already_defined_list, StandardMembersF=True)

        if   name == "id":            descriptor.token_id_type      = type_code_fragment
        elif name == "column_number": descriptor.column_number_type = type_code_fragment
        elif name == "line_number":   descriptor.line_number_type   = type_code_fragment
        else:
            assert False # This should have been caught by the variable parser function

        already_defined_list.append([name, type_code_fragment])
Ejemplo n.º 8
0
def run(cl, Argv):

    if   Setup.query_version_f: print_version(); return
    elif Setup.query_help_f:    print_help(); return

    # Regular Expressions extract the BufferLimitCode and the PathTerminatorCode
    # from the sets. So let us define them outside the normal range.
    backup_buffer_limit_code = Setup.buffer_limit_code
    backup_path_limit_code   = Setup.path_limit_code
    Setup.buffer_limit_code = -1
    Setup.path_limit_code   = -1

    try: 
        if   Setup.query_codec:                __handle_codec(cl)
        elif Setup.query_codec_list:           __handle_codec_list(cl)
        elif Setup.query_codec_file:           __handle_codec_file(cl)
        elif Setup.query_codec_language:       __handle_codec_for_language(cl)
        elif Setup.query_property is not None: __handle_property(cl)
        elif Setup.query_set_by_property:      __handle_set_by_property(cl)
        elif Setup.query_set_by_expression:    __handle_set_by_expression(cl)
        elif Setup.query_property_match:       __handle_property_match(cl)
        else:
            assert False # No query option(s) !
    except RegularExpressionException, x:
        error.log(x.message)
Ejemplo n.º 9
0
def __parse_event(new_mode, fh, word):
    pos = fh.tell()

    # Allow '<<EOF>>' and '<<FAIL>>' out of respect for classical tools like 'lex'
    if word == "<<EOF>>": word = "on_end_of_stream"
    elif word == "<<FAIL>>": word = "on_failure"
    elif word in blackboard.all_section_title_list:
        error.log("Pattern '%s' is a quex section title. Has the closing '}' of mode %s \n" % (word, new_mode.name) \
                  + "been forgotten? Else use quotes, i.e. \"%s\"." % word, fh)
    elif len(word) < 3 or word[:3] != "on_":
        return False

    comment = "Unknown event handler '%s'. \n" % word + \
              "Note, that any pattern starting with 'on_' is considered an event handler.\n" + \
              "use double quotes to bracket patterns that start with 'on_'."

    __general_validate(fh, new_mode, word, pos)
    error.verify_word_in_list(word,
                              standard_incidence_db.keys() + ["keyword_list"],
                              comment, fh)

    code = code_fragment.parse(fh,
                               "%s::%s event handler" % (new_mode.name, word))
    incidence_id = standard_incidence_db[word][0]
    if Lng.suspicious_RETURN_in_event_handler(incidence_id, code.get_text()):
        error.warning("Suspicious 'RETURN' in event handler '%s'.\n" % incidence_id \
                      + "This statement will trigger 'on_after_match' handler.\n" \
                      + "May be, use plain return instead.", code.sr)

    if word == "on_n_dedent" and not token_db.support_repetition():
        error.warning("Found 'on_n_dedent', but no single token has been specified\n" \
                      "in a 'repeated_token' section.", code.sr)
    new_mode.incidence_db[word] = code

    return True
Ejemplo n.º 10
0
def __general_validate(fh, Mode, Name, pos):
    if Name == "on_indentation":
        fh.seek(pos)
        error.log("Definition of 'on_indentation' is no longer supported since version 0.51.1.\n"
                  "Please, use 'on_indent' for the event of an opening indentation, 'on_dedent'\n"
                  "for closing indentation, and 'on_nodent' for no change in indentation.", fh) 


    def error_dedent_and_ndedent(code, A, B):
        error.log("Indentation event handler '%s' cannot be defined, because\n" % A,
                  fh, DontExitF=True)
        error.log("the alternative '%s' has already been defined." % B,
                  code.sr)

    if Name == "on_dedent" and Mode.incidence_db.has_key("on_n_dedent"):
        fh.seek(pos)
        code = Mode.incidence_db["on_n_dedent"]
        if not code.is_whitespace():
            error_dedent_and_ndedent(code, "on_dedent", "on_n_dedent")
                      
    if Name == "on_n_dedent" and Mode.incidence_db.has_key("on_dedent"):
        fh.seek(pos)
        code = Mode.incidence_db["on_dedent"]
        if not code.is_whitespace():
            error_dedent_and_ndedent(code, "on_n_dedent", "on_dedent")
Ejemplo n.º 11
0
def __prepare_buffer_element_specification(setup):
    global global_character_type_db
    if Setup.buffer_lexatom_size_in_byte == "wchar_t":
        error.log("Since Quex version 0.53.5, 'wchar_t' can no longer be specified\n"
                  "with option '--buffer-element-size' or '-bes'. Please, specify\n"
                  "'--buffer-element-type wchar_t' or '--bet'.")

    if Setup.buffer_lexatom_type == "wchar_t":
        Setup.converter_ucs_coding_name = "WCHAR_T"

    # (*) Determine buffer element type and size (in bytes)
    lexatom_size_in_byte = Setup.buffer_lexatom_size_in_byte
    if lexatom_size_in_byte == -1:
        if global_character_type_db.has_key(Setup.buffer_lexatom_type):
            lexatom_size_in_byte = global_character_type_db[Setup.buffer_lexatom_type][3]
        elif Setup.buffer_lexatom_type == "":
            lexatom_size_in_byte = 1
        else:
            # Buffer element type is not identified in 'global_character_type_db'.
            # => here Quex cannot know its size on its own.
            lexatom_size_in_byte = -1

    if Setup.buffer_lexatom_type == "":
        if lexatom_size_in_byte in [1, 2, 4]:
            Setup.buffer_lexatom_type = { 
                1: "uint8_t", 2: "uint16_t", 4: "uint32_t",
            }[lexatom_size_in_byte]
        elif lexatom_size_in_byte == -1:
            pass
        else:
            error.log("Buffer element type cannot be determined for size '%i' which\n" \
                      % lexatom_size_in_byte + 
                      "has been specified by '-b' or '--buffer-element-size'.")

    return lexatom_size_in_byte
Ejemplo n.º 12
0
def __parse_event(new_mode, fh, word):
    pos = fh.tell()

    # Allow '<<EOF>>' and '<<FAIL>>' out of respect for classical tools like 'lex'
    if   word == "<<EOF>>":                  word = "on_end_of_stream"
    elif word == "<<FAIL>>":                 word = "on_failure"
    elif word in blackboard.all_section_title_list:
        error.log("Pattern '%s' is a quex section title. Has the closing '}' of mode %s \n" % (word, new_mode.name) \
                  + "been forgotten? Else use quotes, i.e. \"%s\"." % word, fh)
    elif len(word) < 3 or word[:3] != "on_": return False

    comment = "Unknown event handler '%s'. \n" % word + \
              "Note, that any pattern starting with 'on_' is considered an event handler.\n" + \
              "use double quotes to bracket patterns that start with 'on_'."

    __general_validate(fh, new_mode, word, pos)
    error.verify_word_in_list(word, standard_incidence_db.keys(), comment, 
                              fh)
    __validate_required_token_policy_queue(word, fh, pos)

    continue_f = True
    if word == "on_end_of_stream" or word == "on_failure":
        # -- When a termination token is sent, no other token shall follow. 
        #    => Enforce return from the analyzer! Do not allow CONTINUE!
        # -- When an 'on_failure' is received allow immediate action of the
        #    receiver => Do not allow CONTINUE!
        continue_f = False

    new_mode.incidence_db[word] = \
            code_fragment.parse(fh, "%s::%s event handler" % (new_mode.name, word),
                                ContinueF=continue_f)

    return True
Ejemplo n.º 13
0
def __parse_base_mode_list(fh, new_mode):
    new_mode.derived_from_list = []
    trailing_comma_f    = False
    while 1 + 1 == 2:
        if   check(fh, "{"): fh.seek(-1, 1); break
        elif check(fh, "<"): fh.seek(-1, 1); break

        skip_whitespace(fh)
        identifier = read_identifier(fh)
        if identifier == "": break

        new_mode.derived_from_list.append(identifier)
        trailing_comma_f = False
        if not check(fh, ","): break
        trailing_comma_f = True


    if trailing_comma_f:
        error.warning("Trailing ',' after base mode '%s'." % new_mode.derived_from_list[-1], fh) 
        
    elif len(new_mode.derived_from_list) != 0:
        # This check is a 'service' -- for those who follow the old convention
        pos = fh.tell()
        skip_whitespace(fh)
        dummy_identifier = read_identifier(fh)
        if dummy_identifier != "":
            error.log("Missing separating ',' between base modes '%s' and '%s'.\n" \
                      % (new_mode.derived_from_list[-1], dummy_identifier) + \
                      "(The comma separator is mandatory since quex 0.53.1)", fh)
        fh.seek(pos)
Ejemplo n.º 14
0
def parse(fh):
    """This function parses a mode description and enters it into the 
       'blackboard.mode_description_db'. Once all modes are parsed
       they can be translated into 'real' modes and are located in
       'blackboard.mode_db'. 
    """

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    skip_whitespace(fh)
    mode_name = read_identifier(fh, OnMissingStr="Missing identifier at beginning of mode definition.")

    # NOTE: constructor does register this mode in the mode_db
    new_mode  = ModeDescription(mode_name, SourceRef.from_FileHandle(fh))

    # (*) inherited modes / option_db
    skip_whitespace(fh)
    dummy = fh.read(1)
    if dummy not in [":", "{"]:
        error.log("missing ':' or '{' after mode '%s'" % mode_name, fh)

    if dummy == ":":
        __parse_option_list(new_mode, fh)

    # (*) read in pattern-action pairs and events
    while __parse_element(new_mode, fh): 
        pass
Ejemplo n.º 15
0
def parse(fh):
    """Parses pattern definitions of the form:
   
          WHITESPACE  [ \t\n]
          IDENTIFIER  [a-zA-Z0-9]+
          OP_PLUS     "+"

          \function SOMETHING(sm = X, set = Y, number = N):
          
       That means: 'name' whitespace 'regular expression' whitespace newline.
       Comments can only be '//' nothing else and they have to appear at the
       beginning of the line.
       
       One regular expression can have more than one name, but one name can 
       only have one regular expression.
    """
    skip_whitespace(fh)
    if not check(fh, "{"):
        error.log("define region must start with opening '{'.", fh)

    while 1 + 1 == 2:
        skip_whitespace(fh)

        if check(fh, "}"): 
            return
        
        # Get the name of the pattern
        skip_whitespace(fh)
        if check(fh, "\\function"): name, value = _parse_function(fh)
        else:                       name, value = _parse_pattern(fh)

        blackboard.shorthand_db[name] = value
Ejemplo n.º 16
0
Archivo: core.py Proyecto: xxyzzzq/quex
def do(BufferCodecName, BufferCodecFileName=""):
    from   quex.engine.state_machine.transformation.base              import EncodingTrafoUnicode
    from   quex.engine.state_machine.transformation.table             import EncodingTrafoByTable
    from   quex.engine.state_machine.transformation.utf8_state_split  import EncodingTrafoUTF8
    from   quex.engine.state_machine.transformation.utf16_state_split import EncodingTrafoUTF16

    if   BufferCodecName == "utf8":
        return EncodingTrafoUTF8()

    elif BufferCodecName == "utf16":
        return EncodingTrafoUTF16()

    elif BufferCodecFileName:
        os.path.splitext(os.path.basename(BufferCodecFileName))
        try: 
           os.path.splitext(os.path.basename(BufferCodecFileName))
        except:
            error.log("cannot interpret string following '--codec-file'")
        return EncodingTrafoByTable(FileName=BufferCodecFileName)

    elif BufferCodecName == "unicode":
        # (Still, 'icu' or 'iconv' may provide converted content, but ...) 
        # If the internal buffer is 'unicode', then the pattern's state 
        # machines are not converted. The requirement for the pattern's
        # range is the same as for the 'buffer element chunks'.
        return EncodingTrafoUnicode(NumberSet(Interval(0, 0x110000)), 
                                    NumberSet(Interval(0, 0x110000))) 

    elif BufferCodecName == "unit-test":
        return EncodingTrafoUnicode(NumberSet_All(), NumberSet_All())

    else:
        return EncodingTrafoByTable(BufferCodecName)
Ejemplo n.º 17
0
def test(Text):
    global count_n
    count_n += 1

    if Text.find("\n") == -1:
        print "(%i) |%s|\n" % (count_n, Text)
    else:
        print "(%i)\n::\n%s\n::\n" % (count_n, Text)

    sh = StringIO(Text)
    sh.name = "test_string"

    descr = None
    # descr = counter.parse_line_column_counter(sh)
    try:
        descr = counter.parse_line_column_counter(sh)
        pass

    except EndOfStreamException:
        error.log("End of file reached while parsing 'counter' section.",
                  sh,
                  DontExitF=True)

    except:
        print "Exception!"

    if descr is not None: print descr.count_command_map
    print
Ejemplo n.º 18
0
    def get_character_value_limit(self):
        """A buffer element is a chunk of memory of the size of the granularity
        of which the input pointer increases. For fixed size codecs, such as
        ASCII or UCS32, the BUFFER ELEMENT VALUE LIMIT is exactly the same as
        the CHARACTER VALUE LIMIT. 

        However, for dynamic sized codecs, such as UTF8 or UTF16, they are
        different. In UTF8, the input pointer increments by one byte on each
        state transition. However, a character may consist out of multiple
        bytes. The buffer element value limit is 256, but the character value
        limit is the whole range.
        
        
        RETURNS: Integer = supremum of possible character range, i.e.
                           one character behind the last possible.

                 sys.maxint, if no such limit exists.
        """
        buffer_element_size = self.buffer_element_size

        if buffer_element_size == -1: return sys.maxint

        try:
            result = 256 ** buffer_element_size
        except:
            error.log("Error while trying to compute 256 to the 'buffer-element-size' (%i bytes)\n"   \
                      % buffer_element_size + \
                      "Adapt \"--buffer-element-size\" or \"--buffer-element-type\",\n"       + \
                      "or specify '--buffer-element-size-irrelevant' to ignore the issue.")

        if result > sys.maxint: return sys.maxint
        else:                   return result
Ejemplo n.º 19
0
def utf8_to_unicode(ByteSequence):
    """Unfortunately, there is no elegant way to do the utf8-decoding 
       safely in libPython, since due to strange behavior of a python 
       narrow build a character >= 0x10000 may appear as a 2 byte string 
       and cannot be handled by 'ord' in python 2.x.

       Thus: 
              utf8d = codecs.getdecoder("utf-8")
              return ord(utf8d("".join(map(chr, ByteSequence)))[0])

       would be unsafe. That's why we do it manually.
    """
    # Assume that the byte sequence is valid, thus a byte sequence of length 'N'
    # has a N - 1 leading ones in the header plus a zero. Remaining bits in the
    # header are therefore 8 - N. All other bytes in the sequence start with bits '10'
    # and contain 6 bits of useful payload.
    header_bit_n = 8 - len(ByteSequence)
    mask = (1 << header_bit_n) - 1
    value = ByteSequence[0] & mask
    for byte in ByteSequence[1:]:
        value <<= 6
        value |= (byte & 0x3F)  # blend off the highest two bits
        # The highest two bits in a follow byte in utf8 MUST be '10'. Thus:
        if (byte & 0xC0) != 0x80:
            error.log(
                "Error in UTF8 encoded file. Inadmissible byte sequence detected. Found byte '%02X'"
                % byte)

    return value
Ejemplo n.º 20
0
    def buffer_element_specification_prepare(self):
        global global_character_type_db
        if self.buffer_element_size == "wchar_t":
            error.log("Since Quex version 0.53.5, 'wchar_t' can no longer be specified\n"
                      "with option '--buffer-element-size' or '-bes'. Please, specify\n"
                      "'--buffer-element-type wchar_t' or '--bet'.")

        if self.buffer_element_type == "wchar_t":
            self.converter_ucs_coding_name = "WCHAR_T"

        # (*) Determine buffer element type and size (in bytes)
        if self.buffer_element_size == -1:
            if global_character_type_db.has_key(self.buffer_element_type):
                self.buffer_element_size = global_character_type_db[self.buffer_element_type][3]
            elif self.buffer_element_type == "":
                self.buffer_element_size = 1
            else:
                # Buffer element type is not identified in 'global_character_type_db'.
                # => here Quex cannot know its size on its own.
                self.buffer_element_size = -1

        if self.buffer_element_type == "":
            if self.buffer_element_size in [1, 2, 4]:
                self.buffer_element_type = { 
                    1: "uint8_t", 2: "uint16_t", 4: "uint32_t",
                }[self.buffer_element_size]
            elif self.buffer_element_size == -1:
                pass
            else:
                error.log("Buffer element type cannot be determined for size '%i' which\n" \
                          % self.buffer_element_size + 
                          "has been specified by '-b' or '--buffer-element-size'.")

        self.__buffer_element_specification_done_f = True
Ejemplo n.º 21
0
def _error_if_defined_before(Before, sr):
    if not Before.set_f(): return

    error.log("'%s' has been defined before;" % Before.name,
              sr,
              DontExitF=True)
    error.log("at this place.", Before.sr)
Ejemplo n.º 22
0
    def buffer_codec_prepare(self, BufferCodecName, BufferCodecFileName=None, Module=None):
        """Determines: Setup.buffer_codec_name
                       Setup.buffer_codec
        """
        assert    BufferCodecName == "unit-test" \
               or self.__buffer_element_specification_done_f == True

        if   BufferCodecName in ("utf8", "utf16"):
            assert Module is not None
            result = codec_db.CodecDynamicInfo(BufferCodecName, Module)
        elif BufferCodecFileName:
            os.path.splitext(os.path.basename(BufferCodecFileName))
            try: 
               os.path.splitext(os.path.basename(BufferCodecFileName))
            except:
                error.log("cannot interpret string following '--codec-file'")
            result = codec_db.CodecTransformationInfo(FileName=BufferCodecFileName)
        elif BufferCodecName == "unicode":
            # (Still, 'icu' or 'iconv' may provide converted content, but ...) 
            # If the internal buffer is 'unicode', then the pattern's state 
            # machines are not converted. The requirement for the pattern's
            # range is the same as for the 'buffer element chunks'.
            result = codec_db.CodecInfo("unicode", 
                                NumberSet.from_range(0, self.get_character_value_limit()), 
                                NumberSet.from_range(0, self.get_character_value_limit()))
        elif BufferCodecName == "unit-test":
            result = codec_db.CodecInfo("unicode", 
                                NumberSet.from_range(-sys.maxint, sys.maxint),
                                NumberSet.from_range(-sys.maxint, sys.maxint))

        else:
            result = codec_db.CodecTransformationInfo(BufferCodecName)

        self.buffer_codec = result
Ejemplo n.º 23
0
def parse(fh,
          CodeFragmentName,
          ErrorOnFailureF=True,
          AllowBriefTokenSenderF=True,
          ContinueF=True):
    """RETURNS: An object of class CodeUser containing
                line number, filename, and the code fragment.

                None in case of failure.
    """
    assert type(ErrorOnFailureF) == bool
    assert type(AllowBriefTokenSenderF) == bool

    skip_whitespace(fh)

    word = fh.read(2)
    if len(word) >= 1 and word[0] == "{":
        if len(word) > 1: fh.seek(-1, 1)  # unput the second character
        return __parse_normal(fh, CodeFragmentName)

    elif AllowBriefTokenSenderF and word == "=>":
        return __parse_brief_token_sender(fh, ContinueF)

    elif not ErrorOnFailureF:
        fh.seek(-2, 1)
        return None
    else:
        error.log(
            "Missing code fragment after %s definition." % CodeFragmentName,
            fh)
Ejemplo n.º 24
0
def __check_file_name(setup, Candidate, Name, Index=None, CommandLineOption=None):
    value             = setup.__dict__[Candidate]
    if len(value) == 0: return

    if CommandLineOption is None:
        CommandLineOption = command_line_args(Candidate)

    if Index is not None:
        if type(value) != list or len(value) <= Index: value = ""
        else:                                          value = value[Index]

    if type(value) == list:
        for name in value:
            if name != "" and name[0] == "-": 
                error.log("Quex refuses to work with file names that start with '-' (minus).\n"  + \
                          "Received '%s' for %s (%s)" % (value, name, repr(CommandLineOption)[1:-1]))
            if os.access(name, os.F_OK) == False:
                # error.log("File %s (%s)\ncannot be found." % (name, Name))
                error.log_file_not_found(name, Name)
    else:
        if value == "" or value[0] == "-":              return
        if os.access(value, os.F_OK):                   return
        if os.access(QUEX_PATH + "/" + value, os.F_OK): return
        if     os.access(os.path.dirname(value), os.F_OK) == False \
           and os.access(QUEX_PATH + "/" + os.path.dirname(value), os.F_OK) == False:
            error.log("File '%s' is supposed to be located in directory '%s' or\n" % \
                      (os.path.basename(value), os.path.dirname(value)) + \
                      "'%s'. No such directories exist." % \
                      (QUEX_PATH + "/" + os.path.dirname(value)))
        error.log_file_not_found(value, Name)
Ejemplo n.º 25
0
 def error_dedent_and_ndedent(code, A, B):
     error.log(
         "Indentation event handler '%s' cannot be defined, because\n" % A,
         fh,
         DontExitF=True)
     error.log("the alternative '%s' has already been defined." % B,
               code.sr)
Ejemplo n.º 26
0
def __parse(fh, result, IndentationSetupF=False):
    """Parses pattern definitions of the form:
   
          [ \t]                                       => grid 4;
          [:intersection([:alpha:], [\X064-\X066]):]  => space 1;

       In other words the right hand side *must* be a character set.
    """

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    while 1 + 1 == 2:
        skip_whitespace(fh)
        if check(fh, ">"): 
            break
        
        # A regular expression state machine
        pattern, identifier, sr = __parse_definition_head(fh, result)
        if pattern is None and IndentationSetupF:
            error.log("Keyword '\\else' cannot be used in indentation setup.", fh)

        # '__parse_definition_head()' ensures that only identifiers mentioned in 
        # 'result' are accepted. 
        if not IndentationSetupF:
            value = read_value_specifier(fh, identifier, 1)
            result.specify(identifier, pattern, value, sr)
        else:
            result.specify(identifier, pattern, sr)

        if not check(fh, ";"):
            error.log("Missing ';' after '%s' specification." % identifier, fh)

    return result
Ejemplo n.º 27
0
def test(Text):
    global count_n
    count_n += 1

    if Text.find("\n") == -1:
        print "(%i) |%s|\n" % (count_n, Text)
    else:
        print "(%i)\n::\n%s\n::\n" % (count_n, Text)

    sh      = StringIO(Text)
    sh.name = "test_string"

    descr = None
    # descr = counter.parse_line_column_counter(sh)
    try:    
        descr = counter.parse_line_column_counter(sh)
        pass

    except EndOfStreamException:
        error.log("End of file reached while parsing 'counter' section.", sh, DontExitF=True)

    except:
        print "Exception!"

    if descr is not None: print descr.count_command_map
    print
Ejemplo n.º 28
0
def unicode_to_utf8(UnicodeValue):
    if UnicodeValue < 0x80:
        return [ UnicodeValue, ]
    elif UnicodeValue < 0x800:
        # Bits: 5 + 6 
        return [ 0xC0 | ((UnicodeValue >> 6) & 0x1F),
                 0x80 | ((UnicodeValue     ) & 0x3F)]
    elif UnicodeValue < 0x10000:
        # Bits: 4 + 6 + 6
        return [ 0xE0 | ((UnicodeValue >> 12) & 0x0F),
                 0x80 | ((UnicodeValue >>  6) & 0x3F),
                 0x80 | ((UnicodeValue      ) & 0x3F)]
    elif UnicodeValue < 0x00200000:
        # Bits: 3 + 6 + 6 + 6
        return [ 0xF0 | ((UnicodeValue >> 18) & 0x07),
                 0x80 | ((UnicodeValue >> 12) & 0x3F),
                 0x80 | ((UnicodeValue >>  6) & 0x3F),
                 0x80 | ((UnicodeValue      ) & 0x3F)]
    elif UnicodeValue < 0x04000000L:
        # Bits: 2 + 6 + 6 + 6 + 6
        return [ 0xF0 | ((UnicodeValue >> 24) & 0x03),
                 0x80 | ((UnicodeValue >> 18) & 0x3F),
                 0x80 | ((UnicodeValue >> 12) & 0x3F),
                 0x80 | ((UnicodeValue >>  6) & 0x3F),
                 0x80 | ((UnicodeValue      ) & 0x3F)]
    elif UnicodeValue < 0x80000000L:
        # Bits: 1 + 6 + 6 + 6 + 6 + 6
        return [ 0xF0 | ((UnicodeValue >> 30) & 0x01),
                 0x80 | ((UnicodeValue >> 24) & 0x3F),
                 0x80 | ((UnicodeValue >> 18) & 0x3F),
                 0x80 | ((UnicodeValue >> 12) & 0x3F),
                 0x80 | ((UnicodeValue >>  6) & 0x3F),
                 0x80 | ((UnicodeValue      ) & 0x3F)]
    else:
        error.log("Unicode character 0x%8X > 0x7FFFFFFF detected. Cannot be handled." % UnicodeValue)
Ejemplo n.º 29
0
def utf8_to_unicode(ByteSequence):
    """Unfortunately, there is no elegant way to do the utf8-decoding 
       safely in libPython, since due to strange behavior of a python 
       narrow build a character >= 0x10000 may appear as a 2 byte string 
       and cannot be handled by 'ord' in python 2.x.

       Thus: 
              utf8d = codecs.getdecoder("utf-8")
              return ord(utf8d("".join(map(chr, ByteSequence)))[0])

       would be unsafe. That's why we do it by hand here
    """
    # Assume that the byte sequence is valid, thus a byte sequence of length 'N'
    # has a N - 1 leading ones in the header plus a zero. Remaining bits in the
    # header are therefore 8 - N. All other bytes in the sequence start with bits '10'
    # and contain 6 bits of useful payload.
    header_bit_n = 8 - len(ByteSequence)
    mask         = (1 << header_bit_n) - 1
    value = ByteSequence[0] & mask
    for byte in ByteSequence[1:]:
        value <<= 6
        value |=  (byte & 0x3F)   # blend off the highest two bits
        # The highest two bits in a follow byte in utf8 MUST be '10'. Thus:
        if (byte & 0xC0) != 0x80:
            error.log("Error in UTF8 encoded file. Inadmissible byte sequence detected. Found byte '%02X'" % byte)

    return value
Ejemplo n.º 30
0
def do(file_list):
    if not file_list and not (Setup.token_class_only_f
                              or Setup.converter_only_f):
        error.log("No input files.")

    mode_prep_prep_db = {}  # mode name --> Mode_PrepPrep object
    #                      # later: Mode_PrepPrep is transformed into Mode objects.

    # If a foreign token-id file was presented even the standard token ids
    # must be defined there.
    if not Setup.extern_token_id_file:
        prepare_default_standard_token_ids()

    for file_name in file_list:
        error.insight("File '%s'" % file_name)
        fh = open_file_or_die(file_name, CodecCheckF=True)

        # read all modes until end of file
        try:
            while 1 + 1 == 2:
                parse_section(fh, mode_prep_prep_db)
        except EndOfStreamException:
            pass
        except RegularExpressionException, x:
            error.log(x.message, fh)
Ejemplo n.º 31
0
    def detect_path_of_nothing_is_necessary(sm, Name, PostContextPresentF, fh):
        assert Name in ["", "pre-context", "post-context"]
        if sm is None:
            return
        elif not sm.get_init_state().is_acceptance():
            return
        if len(Name) == 0: name_str = "core pattern"
        else: name_str = Name

        msg = "The %s contains in a 'nothing is necessary' path in the state machine.\n"   \
              % name_str                                                                     + \
              "This means, that without reading a character the analyzer drops into\n"   + \
              "an acceptance state. "

        msg += {
            "":
                "The analyzer would then stall.",

            "pre-context":
                "E.g., pattern 'x*/y/' means that zero or more 'x' are a pre-\n"             + \
                "condition for 'y'. If zero appearances of 'x' are enough, then obviously\n" + \
                "there is no pre-context for 'y'! Most likely the author intended 'x+/y/'.",

            "post-context":
                "A post context where nothing is necessary is superfluous.",
        }[Name]

        if Name != "post-context" and PostContextPresentF:
            msg += "\n"                                                          \
                   "Note: A post context does not change anything to that fact."

        error.log(msg, fh)
Ejemplo n.º 32
0
def __parse_event(new_mode, fh, word):
    pos = fh.tell()

    # Allow '<<EOF>>' and '<<FAIL>>' out of respect for classical tools like 'lex'
    if word == "<<EOF>>": word = "on_end_of_stream"
    elif word == "<<FAIL>>": word = "on_failure"
    elif word in blackboard.all_section_title_list:
        error.log("Pattern '%s' is a quex section title. Has the closing '}' of mode %s \n" % (word, new_mode.name) \
                  + "been forgotten? Else use quotes, i.e. \"%s\"." % word, fh)
    elif len(word) < 3 or word[:3] != "on_":
        return False

    comment = "Unknown event handler '%s'. \n" % word + \
              "Note, that any pattern starting with 'on_' is considered an event handler.\n" + \
              "use double quotes to bracket patterns that start with 'on_'."

    __general_validate(fh, new_mode, word, pos)
    error.verify_word_in_list(word, standard_incidence_db.keys(), comment, fh)
    __validate_required_token_policy_queue(word, fh, pos)

    continue_f = True
    if word == "on_end_of_stream" or word == "on_failure":
        # -- When a termination token is sent, no other token shall follow.
        #    => Enforce return from the analyzer! Do not allow CONTINUE!
        # -- When an 'on_failure' is received allow immediate action of the
        #    receiver => Do not allow CONTINUE!
        continue_f = False

    new_mode.incidence_db[word] = \
            code_fragment.parse(fh, "%s::%s event handler" % (new_mode.name, word),
                                ContinueF=continue_f)

    return True
Ejemplo n.º 33
0
def parse(fh):
    """This function parses a mode description and enters it into the 
       'blackboard.mode_description_db'. Once all modes are parsed
       they can be translated into 'real' modes and are located in
       'blackboard.mode_db'. 
    """

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    skip_whitespace(fh)
    mode_name = read_identifier(
        fh, OnMissingStr="Missing identifier at beginning of mode definition.")

    # NOTE: constructor does register this mode in the mode_db
    new_mode = ModeDescription(mode_name, SourceRef.from_FileHandle(fh))

    # (*) inherited modes / option_db
    skip_whitespace(fh)
    dummy = fh.read(1)
    if dummy not in [":", "{"]:
        error.log("missing ':' or '{' after mode '%s'" % mode_name, fh)

    if dummy == ":":
        __parse_option_list(new_mode, fh)

    # (*) read in pattern-action pairs and events
    while __parse_element(new_mode, fh):
        pass
Ejemplo n.º 34
0
def __general_validate(fh, Mode, Name, pos):
    if Name == "on_indentation":
        fh.seek(pos)
        error.log(
            "Definition of 'on_indentation' is no longer supported since version 0.51.1.\n"
            "Please, use 'on_indent' for the event of an opening indentation, 'on_dedent'\n"
            "for closing indentation, and 'on_nodent' for no change in indentation.",
            fh)

    def error_dedent_and_ndedent(code, A, B):
        error.log(
            "Indentation event handler '%s' cannot be defined, because\n" % A,
            fh,
            DontExitF=True)
        error.log("the alternative '%s' has already been defined." % B,
                  code.sr)

    if Name == "on_dedent" and Mode.incidence_db.has_key("on_n_dedent"):
        fh.seek(pos)
        code = Mode.incidence_db["on_n_dedent"]
        if not code.is_whitespace():
            error_dedent_and_ndedent(code, "on_dedent", "on_n_dedent")

    if Name == "on_n_dedent" and Mode.incidence_db.has_key("on_dedent"):
        fh.seek(pos)
        code = Mode.incidence_db["on_dedent"]
        if not code.is_whitespace():
            error_dedent_and_ndedent(code, "on_n_dedent", "on_dedent")
Ejemplo n.º 35
0
def __error_detection(not_found_list, recursive_list):
    ErrorN = NotificationDB.token_id_ignored_files_report
    if ErrorN not in Setup.suppressed_notification_list:
        if not_found_list:
            not_found_list.sort()
            sr = SourceRef(not_found_list[0][0], LineN=not_found_list[0][1])
            error.warning("Files not found:", sr)
            for file_name, line_n, included_file in not_found_list:
                error.warning("%s" % included_file,
                              SourceRef(file_name, line_n))

        if recursive_list:
            recursive_list.sort()
            sr = SourceRef(recursive_list[0][0], LineN=recursive_list[0][1])
            error.warning(
                "Files recursively included (ignored second inclusion):", sr)
            for file_name, line_n, included_file in recursive_list:
                error.warning("%s" % included_file,
                              SourceRef(file_name, line_n))

        if not_found_list or recursive_list:
            # source reference is taken from last setting
            error.log("\nQuex does not handle C-Preprocessor instructions.",
                      sr,
                      NoteF=True,
                      DontExitF=True,
                      SuppressCode=ErrorN)
Ejemplo n.º 36
0
def __roman_number(Text, fh):
    """Source: http://code.activestate.com -- Code Recipes 
               Recipe 81611 by Paul Winkler.
    """
    input = Text.upper()
    # map of (numeral, value, maxcount) tuples
    roman_numeral_map = (('M', 1000, None), ('CM', 900, 1), ('D', 500, 1),
                         ('CD', 400, 1), ('C', 100, 3), ('XC', 90, 1),
                         ('L', 50, 1), ('XL', 40, 1), ('X', 10, 3),
                         ('IX', 9, 1), ('V', 5, 1), ('IV', 4, 1), ('I', 1, 3))

    result, index = 0, 0
    for numeral, value, maxcount in roman_numeral_map:
        count = 0
        while input[index:index + len(numeral)] == numeral:
            count += 1  # how many of this numeral we have
            if maxcount is not None and count > maxcount:
                error.log("input 0r%s is not a valid roman numeral." % Text,
                          fh)
            result += value
            index += len(numeral)

    if index < len(input):  # There are characters unaccounted for.
        error.log("input 0r%s is not a valid roman numeral." % Text, fh)

    return result
Ejemplo n.º 37
0
def __handle_property_match(cl):
    property_follower = Setup.query_property_match
    if not property_follower: return

    sys.stderr.write("(please, wait for database parsing to complete)\n")

    fields = map(lambda x: x.strip(), property_follower.split("="))
    if len(fields) != 2:
        error.log("Wrong property setting '%s'." % property_follower)

    # -- determine name and value
    name                 = fields[0]
    wild_card_expression = fields[1]

    # -- get the property from the database
    property = __get_property(name)
    if property is None: 
        return True

    # -- find the character set for the given expression
    if property.type == "Binary":
        error.log("Binary property '%s' is not subject to value wild card matching.\n" % property.name)

    for value in property.get_wildcard_value_matches(wild_card_expression):
        print value
Ejemplo n.º 38
0
def read_identifier(fh, TolerantF=False, OnMissingStr=None):
    def __read(fh, TolerantF):
        txt = fh.read(1)
        if len(txt) == 0: return ""

        if TolerantF:
            if is_identifier_continue(txt) == False:
                fh.seek(-1, 1)
                return ""
        else:
            if is_identifier_start(txt) == False:
                fh.seek(-1, 1)
                return ""

        while 1 + 1 == 2:
            tmp = fh.read(1)
            if len(tmp) == 0: return txt

            if is_identifier_continue(tmp): txt += tmp
            else:
                fh.seek(-1, 1)
                return txt

    result = __read(fh, TolerantF)

    if len(result) == 0 and OnMissingStr is not None:
        error.log(OnMissingStr, fh)
    return result
Ejemplo n.º 39
0
def read_integer(fh):
    pos = fh.tell()

    base, digit_list = get_number_base(fh)
    if base is None: return None

    txt = ""
    while 1 + 1 == 2:
        tmp = fh.read(1)
        if tmp == "": break
        elif tmp not in digit_list:
            fh.seek(-1, 1)
            break
        txt += tmp

    # If we drop out on a digit, then let us assume that the user just missed a point
    if tmp.isdigit() or (tmp in list("ABCDEFabcdef")):
        error.log(
            "Digit '%s' cannot be part of an expression of base %s." %
            (tmp, base), fh)

    txt = txt.replace(".", "")
    if len(txt) == 0:
        if base in [2, 8, 16, "roman", "Napier"]:
            error.log(
                "Missing digits after for number of base %s, found '%s'." %
                (str(base), tmp), fh)
        fh.seek(pos)
        return None

    # Octal, decimal, and hexadecimal numbers
    if base in [2, 8, 10, 16]: return int(txt, base)
    elif base == "roman": return __roman_number(txt, fh)
    elif base == "Napier": return __napier_number(txt, fh)
    else: return __binary_number(txt, fh)
Ejemplo n.º 40
0
def optional_flags(fh, SectionName, Default, AdmissibleDb, BadCombinationList):
    pos = fh.tell()
    if not check(fh, "("): return Default

    flag_txt = read_until_character(fh, ")").strip().replace(" ", "").replace(
        "\t", "").replace("\n", "").replace("\r", "")
    for letter in flag_txt:
        if letter in AdmissibleDb: continue

        fh.seek(pos)
        explanation_txt = [
            "'%s' for %s." % (flag, explanation)
            for flag, explanation in AdmissibleDb.iteritems()
        ]
        explanation_str = "Options are: "\
                          + "\n             ".join(explanation_txt)
        error.log("Flag '%s' not permitted for %s.\n" % (letter, SectionName) + \
                  explanation_str, fh)

    # Bad combination check:
    for bad_combination in BadCombinationList:
        suspect_list = [flag for flag in bad_combination if flag in flag_txt]
        if len(suspect_list) > 1:
            suspect_list.sort()
            error.log(
                "Flag '%s' and '%s' cannot be used\n"
                "at the same time in %s." %
                (suspect_list[0], suspect_list[1], SectionName), fh)

    return flag_txt
Ejemplo n.º 41
0
def test(Text):
    global count_n
    count_n += 1

    if Text.find("\n") == -1:
        print "(%i) |%s|\n" % (count_n, Text)
    else:
        print "(%i)\n::\n%s\n::\n" % (count_n, Text)

    sh = StringIO(Text)
    sh.name = "test_string"

    descr = None
    try:
        descr = IndentationCount.from_FileHandle(sh)
    except EndOfStreamException:
        error.log("End of file reached while parsing 'indentation' section.",
                  sh,
                  DontExitF=True)

    except:
        print "Exception!"

    if descr is not None: print descr
    print
Ejemplo n.º 42
0
 def __check_on_orphan_states(Place, sm):
     orphan_state_list = sm.get_orphaned_state_index_list()
     if len(orphan_state_list) == 0: return
     error.log("After '%s'" % Place + "\n" + \
               "Orphaned state(s) detected in regular expression (optimization lack).\n" + \
               "Please, log a defect at the projects website quex.sourceforge.net.\n"    + \
               "Orphan state(s) = " + repr(orphan_state_list)) 
Ejemplo n.º 43
0
 def __check_on_orphan_states(Place, sm):
     orphan_state_list = sm.get_orphaned_state_index_list()
     if len(orphan_state_list) == 0: return
     error.log("After '%s'" % Place + "\n" + \
               "Orphaned state(s) detected in regular expression (optimization lack).\n" + \
               "Please, log a defect at the projects website quex.sourceforge.net.\n"    + \
               "Orphan state(s) = " + repr(orphan_state_list))
Ejemplo n.º 44
0
    def detect_path_of_nothing_is_necessary(sm, Name, PostContextPresentF, fh):
        assert Name in ["", "pre-context", "post-context"]
        if sm is None: 
            return
        elif not sm.get_init_state().is_acceptance(): 
            return
        if len(Name) == 0: name_str = "core pattern"
        else:              name_str = Name

        msg = "The %s contains in a 'nothing is necessary' path in the state machine.\n"   \
              % name_str                                                                     + \
              "This means, that without reading a character the analyzer drops into\n"   + \
              "an acceptance state. "

        msg += { 
            "":
                "The analyzer would then stall.",

            "pre-context":
                "E.g., pattern 'x*/y/' means that zero or more 'x' are a pre-\n"             + \
                "condition for 'y'. If zero appearances of 'x' are enough, then obviously\n" + \
                "there is no pre-context for 'y'! Most likely the author intended 'x+/y/'.",

            "post-context":
                "A post context where nothing is necessary is superfluous.",
        }[Name]

        if Name != "post-context" and PostContextPresentF:
            msg += "\n"                                                          \
                   "Note: A post context does not change anything to that fact." 

        error.log(msg, fh)
Ejemplo n.º 45
0
def __check_file_name(setup,
                      Candidate,
                      Name,
                      Index=None,
                      CommandLineOption=None):
    value = setup.__dict__[Candidate]
    if len(value) == 0: return

    if CommandLineOption is None:
        CommandLineOption = command_line_args(Candidate)

    if Index is not None:
        if type(value) != list or len(value) <= Index: value = ""
        else: value = value[Index]

    if type(value) == list:
        for name in value:
            if name != "" and name[0] == "-":
                error.log("Quex refuses to work with file names that start with '-' (minus).\n"  + \
                          "Received '%s' for %s (%s)" % (value, name, repr(CommandLineOption)[1:-1]))
            if os.access(name, os.F_OK) == False:
                # error.log("File %s (%s)\ncannot be found." % (name, Name))
                error.log_file_not_found(name, Name)
    else:
        if value == "" or value[0] == "-": return
        if os.access(value, os.F_OK): return
        if os.access(QUEX_PATH + "/" + value, os.F_OK): return
        if     os.access(os.path.dirname(value), os.F_OK) == False \
           and os.access(QUEX_PATH + "/" + os.path.dirname(value), os.F_OK) == False:
            error.log("File '%s' is supposed to be located in directory '%s' or\n" % \
                      (os.path.basename(value), os.path.dirname(value)) + \
                      "'%s'. No such directories exist." % \
                      (QUEX_PATH + "/" + os.path.dirname(value)))
        error.log_file_not_found(value, Name)
Ejemplo n.º 46
0
def __parse(fh, result, IndentationSetupF=False):
    """Parses pattern definitions of the form:
   
          [ \t]                                       => grid 4;
          [:intersection([:alpha:], [\X064-\X066]):]  => space 1;

       In other words the right hand side *must* be a character set.
    """

    # NOTE: Catching of EOF happens in caller: parse_section(...)
    #
    while 1 + 1 == 2:
        skip_whitespace(fh)
        if check(fh, ">"):
            break

        # A regular expression state machine
        pattern, identifier, sr = __parse_definition_head(fh, result)
        if pattern is None and IndentationSetupF:
            error.log("Keyword '\\else' cannot be used in indentation setup.",
                      fh)

        # '__parse_definition_head()' ensures that only identifiers mentioned in
        # 'result' are accepted.
        if not IndentationSetupF:
            value = read_value_specifier(fh, identifier, 1)
            result.specify(identifier, pattern, value, sr)
        else:
            result.specify(identifier, pattern, sr)

        if not check(fh, ";"):
            error.log("Missing ';' after '%s' specification." % identifier, fh)

    return result
Ejemplo n.º 47
0
def __parse_base_mode_list(fh, new_mode):
    new_mode.derived_from_list = []
    trailing_comma_f = False
    while 1 + 1 == 2:
        if check(fh, "{"):
            fh.seek(-1, 1)
            break
        elif check(fh, "<"):
            fh.seek(-1, 1)
            break

        skip_whitespace(fh)
        identifier = read_identifier(fh)
        if identifier == "": break

        new_mode.derived_from_list.append(identifier)
        trailing_comma_f = False
        if not check(fh, ","): break
        trailing_comma_f = True

    if trailing_comma_f:
        error.warning(
            "Trailing ',' after base mode '%s'." %
            new_mode.derived_from_list[-1], fh)

    elif len(new_mode.derived_from_list) != 0:
        # This check is a 'service' -- for those who follow the old convention
        pos = fh.tell()
        skip_whitespace(fh)
        dummy_identifier = read_identifier(fh)
        if dummy_identifier != "":
            error.log("Missing separating ',' between base modes '%s' and '%s'.\n" \
                      % (new_mode.derived_from_list[-1], dummy_identifier) + \
                      "(The comma separator is mandatory since quex 0.53.1)", fh)
        fh.seek(pos)
Ejemplo n.º 48
0
 def check_grid_specification(self, Value, sr):
     if   Value == 0: 
         error.log("A grid count of 0 is nonsense. May be define a space count of 0.", sr)
     elif Value == 1:
         error.warning("Indentation grid counts of '1' are equivalent of to a space\n" + \
                       "count of '1'. The latter is faster to compute.",
                           sr)
Ejemplo n.º 49
0
    def _base_parse(self, fh, IndentationSetupF=False):
        """Parses pattern definitions of the form:
       
              [ \t]                                       => grid 4;
              [:intersection([:alpha:], [\X064-\X066]):]  => space 1;

        In other words the right hand side *must* be a character set.

        ADAPTS: result to contain parsing information.
        """

        # NOTE: Catching of EOF happens in caller: parse_section(...)
        #
        while 1 + 1 == 2:
            skip_whitespace(fh)
            if check(fh, ">"): 
                break
            
            # A regular expression state machine
            pattern, identifier, sr = _parse_definition_head(fh, self.identifier_list)
            if pattern is None and IndentationSetupF:
                error.log("Keyword '\\else' cannot be used in indentation setup.", fh)

            # '_parse_definition_head()' ensures that only identifiers mentioned in 
            # 'result' are accepted. 
            if self.requires_count():
                count = _read_value_specifier(fh, identifier, 1)
                self.specify(identifier, pattern, count, sr)
            else:
                self.specify(identifier, pattern, sr)

            if not check(fh, ";"):
                error.log("Missing ';' after '%s' specification." % identifier, fh)

        return self.finalize()
Ejemplo n.º 50
0
Archivo: base.py Proyecto: xxyzzzq/quex
    def adapt_source_and_drain_range(self, LexatomByteN):
        """The drain range may be restricted due to the number of bytes given
        per lexatom. If the 'LexatomByteN' is '-1' it is unrestricted which 
        may be useful for unit tests and theoretical investigations.

        DERIVED CLASS MAY HAVE TO WRITE A DEDICATED VERSION OF THIS FUNCTION
        TO MODIFY THE SOURCE RANGE '.source_set'.
        """
        if LexatomByteN == -1:
            self.lexatom_range = Interval_All()
            return 

        assert LexatomByteN >= 1
        lexatom_min_value = self.drain_set.minimum()
        lexatom_max_value = self.drain_set.supremum() - 1
        if LexatomByteN != -1:
            try:    
                value_n = 256 ** LexatomByteN
            except:
                error.log("Error while trying to compute 256 power the 'lexatom-size' (%i bytes)\n"   \
                          % LexatomByteN + \
                          "Adapt \"--buffer-element-size\" or \"--buffer-element-type\",\n"       + \
                          "or specify '--buffer-element-size-irrelevant' to ignore the issue.")
            lexatom_min_value = 0
            lexatom_max_value = min(lexatom_max_value, value_n - 1)

        lexatom_max_value = min(lexatom_max_value, sys.maxint)

        assert lexatom_max_value > lexatom_min_value

        self.lexatom_range = Interval(lexatom_min_value, lexatom_max_value + 1)
        self.drain_set.mask_interval(self.lexatom_range)
Ejemplo n.º 51
0
def parse(fh, CodeFragmentName, 
          ErrorOnFailureF=True, AllowBriefTokenSenderF=True, ContinueF=True):
    """RETURNS: An object of class CodeUser containing
                line number, filename, and the code fragment.

                None in case of failure.
    """
    assert type(ErrorOnFailureF)        == bool
    assert type(AllowBriefTokenSenderF) == bool

    skip_whitespace(fh)

    word = fh.read(2)
    if len(word) >= 1 and word[0] == "{":
        if len(word) > 1: fh.seek(-1, 1) # unput the second character
        return __parse_normal(fh, CodeFragmentName)

    elif AllowBriefTokenSenderF and word == "=>":
        return __parse_brief_token_sender(fh, ContinueF)

    elif not ErrorOnFailureF:
        fh.seek(-2,1)
        return None
    else:
        error.log("Missing code fragment after %s definition." % CodeFragmentName, 
                  fh)
Ejemplo n.º 52
0
def argv_is_query_option(Cl, Option, Name, PrevQueryF):
    """Determines whether the setup parameter is a parameter related to 
    queries (or to code generation). If a mixed usage is detected an 
    error is issued.

    RETURN: query flag

    The query flag is the same as QueryF, except for one case: when QueryF
    was None (unset) and the option appeared on the command line. Then, the 
    return value tells whether the option was a query flag or not.

    ERROR: If there are mixed options, i.e. query flags and code generation
    flags appear at the same time.
    """
    query_f = (Name.find("query_") == 0)

    if PrevQueryF is None: return query_f
    elif PrevQueryF == query_f: return query_f

    # If debug exception is enabled, do not trigger errror
    if Cl.search(SETUP_INFO["_debug_exception_f"][0]): return query_f

    error.log("Mixed options: query and code generation mode.\n"
              "The option(s) '%s' cannot be combined with preceeding options." \
              % str(SETUP_INFO[Name][0])[1:-1].replace("'",""))
Ejemplo n.º 53
0
 def consistency_check(self):
     self.count_command_map.check_defined(self.sr, E_CharacterCountType.WHITESPACE)
     self.count_command_map.check_defined(self.sr, E_CharacterCountType.BEGIN_NEWLINE)
     if self.sm_newline_suppressor.get() is not None:
         if self.sm_newline.get() is None:
             error.log("A newline 'suppressor' has been defined.\n"
                       "But there is no 'newline' in indentation defintion.", 
                       self.sm_newline_suppressor.sr)
Ejemplo n.º 54
0
def open_data_base_file(Filename):
    try: 
        fh = open(unicode_db_directory + "/" + Filename, "rb")
    except:
        error.log("Fatal---Unicode Database File '%s' not found!\n" % Filename + \
                  "QUEX_PATH='%s'\n" % QUEX_PATH + \
                  "Unicode Database Directory: '%s'" % unicode_db_directory)
    return fh
Ejemplo n.º 55
0
Archivo: core.py Proyecto: xxyzzzq/quex
def argv_catch_string(Cl, Option, Type):
    Cl.reset_cursor()
    value = Cl.follow("##EMPTY##", Option)
    if value == "##EMPTY##":
        if Type == SetupParTypes.OPTIONAL_STRING:
            value = ""
        else:
            error.log("Option %s\nnot followed by anything." % str(Option)[1:-1])
    return value
Ejemplo n.º 56
0
def __compile_regular_expression(Str, Name):
    tmp = Str.replace("*", "\\*")
    tmp = tmp.replace("?", "\\?")
    tmp = tmp.replace("{", "\\{")
    tmp = tmp.replace("}", "\\}")
    try:
        return re.compile(tmp)
    except:
        error.log("Invalid %s: %s" % (Name, Str))
Ejemplo n.º 57
0
def search_and_validate(CL, Option):
    if CL.search(Option) == False: return False

    # Validate command line
    ufos = CL.unidentified_options(OPTION_DB.keys())
    if len(ufos) != 0:
        error.log("Unidentified option(s) = " +  repr(ufos) + "\n" + \
                  get_supported_command_line_option_description())
    return True
Ejemplo n.º 58
0
 def add(self, CharSet, Identifier, Value, sr):
     global cc_type_db
     if CharSet.is_empty(): 
         error.log("Empty character set found for '%s'." % Identifier, sr)
     elif Identifier == "grid":
         self.check_grid_specification(Value, sr)
     cc_type = cc_type_db[Identifier]
     self.check_intersection(cc_type, CharSet, sr)
     self.__map.append((CharSet, CountAction(cc_type, Value, sr)))