Beispiel #1
0
    def append_character(self, character):
        '''Add the next character.'''

        char_str = self._parser_state.alphabet.expression_to_str(character)

        lazy = char_str == '?'

        if character is not None and char_str in '+*':
            raise RxpyError('Compound repeat: ' + self._initial_char_str +
                            char_str)
        elif self._initial_char_str == '?':
            self.build_optional(self._parent, self._latest, lazy)
        elif self._initial_char_str == '+':
            self.build_plus(self._parent, self._latest, lazy,
                            self._parser_state)
        elif self._initial_char_str == '*':
            self.build_star(self._parent, self._latest, lazy,
                            self._parser_state)
        else:
            raise RxpyError('Bad initial character for RepeatBuilder')

        if lazy:
            return self._parent
        else:
            return self._parent.append_character(character)
Beispiel #2
0
    def append_character(self, character, escaped=False):
        '''Add the next character.'''
        char_str = self._parser_state.alphabet.expression_to_str(character)
        if self._create is None:
            if char_str == '<':
                self._create = True
            elif char_str == '=':
                self._create = False
            else:
                raise RxpyError('Unexpected qualifier after (?P - ' + char_str)

        else:
            if self._create and not escaped and char_str == '>':
                if not self._name:
                    raise RxpyError('Empty name for group')
                return GroupBuilder(self._parser_state, self._parent, True,
                                    self._name)
            elif not self._create and not escaped and char_str == ')':
                self._parent._sequence.append(
                    GroupReference(
                        self._parser_state.index_for_name_or_count(
                            self._name)))
                return self._parent
            elif not escaped and char_str == '\\':
                # this is just for the name
                return SimpleEscapeBuilder(self._parser_state, self)
            elif character:
                self._name += char_str
            else:
                raise RxpyError('Incomplete named group')

        return self
Beispiel #3
0
 def append_character(self, character):
     '''Add the next character.'''
     if character is None:
         raise RxpyError('Incomplete unicode escape')
     self.__buffer += character
     self.__remaining -= 1
     if self.__remaining:
         return self
     try:
         return self.__parent.append_character(
             self._parser_state.alphabet.unescape(int(self.__buffer, 16)),
             escaped=True)
     except:
         raise RxpyError('Bad unicode escape: ' + self.__buffer)
Beispiel #4
0
 def group_reference(self, next, number):
     match = self.__match.group(number)
     if match:
         self.__replacement.append(match)
         return False  # loop internally til done
     else:
         raise RxpyError('No match for group ' + str(number))
Beispiel #5
0
 def append_character(self, character):
     '''Add the next character.'''
     self._count += 1
     char_str = self._parser_state.alphabet.expression_to_str(character)
     if self._count == 1:
         if char_str == '?':
             return self
         else:
             builder = GroupBuilder(self._parser_state, self._parent)
             return builder.append_character(character)
     else:
         if char_str == ':':
             return GroupBuilder(self._parser_state,
                                 self._parent,
                                 binding=False)
         elif char_str in ParserStateBuilder.INITIAL:
             return ParserStateBuilder(
                 self._parser_state,
                 self._parent).append_character(character)
         elif char_str == 'P':
             return NamedGroupBuilder(self._parser_state, self._parent)
         elif char_str == '#':
             return CommentGroupBuilder(self._parser_state, self._parent)
         elif char_str == '=':
             return LookaheadBuilder(self._parser_state, self._parent, True,
                                     True)
         elif char_str == '!':
             return LookaheadBuilder(self._parser_state, self._parent,
                                     False, True)
         elif char_str == '<':
             return LookbackBuilder(self._parser_state, self._parent)
         elif char_str == '(':
             return ConditionalBuilder(self._parser_state, self._parent)
         else:
             raise RxpyError('Unexpected qualifier after (? - ' + char_str)
Beispiel #6
0
    def append_character(self, character):
        '''Add the next character.'''

        char_str = self._parser_state.alphabet.expression_to_str(character)

        if self._closed:
            if not self._lazy and char_str == '?':
                self._lazy = True
                return self
            else:
                self.__build()
                return self._parent.append_character(character)

        empty = not self._acc and self._begin is None
        if empty and char_str == '}':
            # oops - not a count at all
            self._parent.append_character(self._open, escaped=True)
            self._parent.append_character(character, escaped=True)
            return self._parent
        elif char_str == '}':
            self.__store_value()
            self._closed = True
        elif char_str == ',':
            self.__store_value()
        elif character is not None:
            self._acc += character
        else:
            raise RxpyError('Incomplete count specification')
        return self
Beispiel #7
0
 def append_character(self, character):
     '''Add the next character.'''
     char_str = self._parser_state.alphabet.expression_to_str(character)
     if character is None:
         raise RxpyError('Incomplete character escape')
     elif char_str in digits and char_str != '0':
         return GroupReferenceBuilder(self._parser_state, self._parent,
                                      character)
     elif char_str == 'A':
         self._parent._sequence.append(StartOfLine(False))
         return self._parent
     elif char_str in 'bB':
         self._parent._sequence.append(WordBoundary(char_str == 'B'))
         return self._parent
     elif char_str in 'dD':
         self._parent._sequence.append(Digit(char_str == 'D'))
         return self._parent
     elif char_str in 'wW':
         self._parent._sequence.append(Word(char_str == 'W'))
         return self._parent
     elif char_str in 'sS':
         self._parent._sequence.append(Space(char_str == 'S'))
         return self._parent
     elif char_str == 'Z':
         self._parent._sequence.append(EndOfLine(False))
         return self._parent
     else:
         return super(ComplexEscapeBuilder,
                      self).append_character(character)
Beispiel #8
0
 def index_for_name_or_count(self, name):
     '''
     Given a group name or index (as text), return the group index (as int).
     First, we parse as an integer, then we try as a name.
     '''
     try:
         index = int(name)
         if index not in self.__index_to_name:
             raise RxpyError('Unknown index ' + str(name))
         else:
             return index
     except ValueError:
         if name not in self.__name_to_index:
             raise RxpyError('Unknown name ' + str(name))
         else:
             return self.__name_to_index[name]
Beispiel #9
0
        def append(character=character):
            '''Helper function to avoid repetition below - adds character.'''
            def unpack(character):
                '''Generate a `CharSet` or a character pair.'''
                (is_charset, value) = \
                    self._parser_state.alphabet.expression_to_charset(
                        character, self._parser_state.flags)
                if not is_charset:
                    value = (character, character)
                return value

            if self._range:
                if self._queue is None:
                    raise RxpyError('Incomplete range')
                else:
                    (alo, ahi) = unpack(self._queue)
                    (blo, bhi) = unpack(character)
                    self._charset.append_interval((alo, blo))
                    self._charset.append_interval((ahi, bhi))
                    self._queue = None
                    self._range = False
            else:
                if self._queue:
                    (lo, hi) = unpack(self._queue)
                    self._charset.append_interval((lo, lo))
                    self._charset.append_interval((hi, hi))
                self._queue = character
Beispiel #10
0
 def parse(self, text):
     builder = self
     for character in text:
         builder = builder.append_character(character)
     builder = builder.append_character(None)
     if self != builder:
         raise RxpyError('Incomplete expression')
     return self._sequence.join(Match(), self._parser_state)
Beispiel #11
0
 def append_character(self, character):
     if not character:
         raise RxpyError('Incomplete character escape')
     elif character == 'g':
         return ReplacementGroupReferenceBuilder(self._parser_state,
                                                 self._parent)
     else:
         return super(ReplacementEscapeBuilder,
                      self).append_character(character)
Beispiel #12
0
def post_process(graph, actions):
    map = {}
    for (type_, function) in actions:
        if type_ not in map:
            map[type_] = function
        else:
            raise RxpyError('Conflicting actions for ' + str(type_))
    for node in node_iterator(graph):
        map.get(type(node), lambda x: None)(node)
    return graph
Beispiel #13
0
 def append_character(self, character, escaped=False):
     '''Add the next character.'''
     char_str = self._parser_state.alphabet.expression_to_str(character)
     if character is None:
         raise RxpyError('Incomplete conditional match')
     elif not escaped and char_str in self.__terminals:
         return self.__conditional.callback(self, character)
     else:
         return super(YesNoBuilder,
                      self).append_character(character, escaped)
Beispiel #14
0
 def append_character(self, character):
     # this is so complex because the tests for different errors are so
     # detailed
     if not self.__buffer and character == '<':
         self.__buffer += character
         return self
     elif len(self.__buffer) > 1 and character == '>':
         self.__parent._sequence.append(self.__decode())
         return self.__parent
     elif character and self.__numeric and character in digits:
         self.__buffer += character
         return self
     elif character and self.__name and character in ALPHANUMERIC:
         self.__buffer += character
         return self
     elif character:
         raise RxpyError('Unexpected character in group escape: ' +
                         character)
     else:
         raise RxpyError('Incomplete group escape')
Beispiel #15
0
 def append_character(self, character):
     '''Add the next character.'''
     char_str = self._parser_state.alphabet.expression_to_str(character)
     if character is None:
         raise RxpyError('Incomplete character escape')
     elif char_str in digits and char_str != '0':
         return GroupReferenceBuilder(self._parser_state, self._parent,
                                      character)
     else:
         return super(IntermediateEscapeBuilder,
                      self).append_character(character)
Beispiel #16
0
    def expression_to_charset(self, char, flags):
        '''
        Given an input character (of expression type), return either a charset
        or a letter from the alphabet.

        Return either (True, CharSet) or (False, letter)
        '''
        from lepl.rxpy.parser.support import ParserState
        if flags & ParserState.IGNORECASE:
            raise RxpyError('Default alphabet does not handle case')
        return False, self.join(self.expression_to_letter(char))
Beispiel #17
0
 def append_character(self, character, escaped=False):
     '''Add the next character.'''
     char_str = self._parser_state.alphabet.expression_to_str(character)
     if not escaped and char_str == ')':
         return self._parent
     elif not escaped and char_str == '\\':
         return SimpleEscapeBuilder(self._parser_state, self)
     elif character is not None:
         return self
     else:
         raise RxpyError('Incomplete comment')
Beispiel #18
0
 def parse_group(self, text):
     '''Parse a set of groups for `Scanner`.'''
     builder = GroupBuilder(self._parser_state, self)
     if self._sequence:
         self.__start_new_alternative()
     for character in text:
         builder = builder.append_character(character)
     try:
         builder = builder.append_character(')')
         assert builder == self
     except:
         raise RxpyError('Incomplete group')
Beispiel #19
0
 def __store_value(self):
     if self._begin is None:
         if not self._acc:
             raise RxpyError('Missing lower limit for repeat')
         else:
             try:
                 self._begin = int(self._acc)
             except ValueError:
                 raise RxpyError('Bad lower limit for repeat: ' + self._acc)
     else:
         if self._range:
             raise RxpyError('Too many values in repeat')
         self._range = True
         if self._acc:
             try:
                 self._end = int(self._acc)
             except ValueError:
                 raise RxpyError('Bad upper limit for repeat: ' + self._acc)
             if self._begin > self._end:
                 raise RxpyError('Inconsistent repeat range')
     self._acc = ''
Beispiel #20
0
 def append_character(self, character):
     '''Add the next character.'''
     char_str = self._parser_state.alphabet.expression_to_str(character)
     if char_str == '=':
         return LookaheadBuilder(self._parser_state, self._parent, True,
                                 False)
     elif char_str == '!':
         self._parser_state.new_flag(ParserState._LOOKBACK)
         return LookaheadBuilder(self._parser_state, self._parent, False,
                                 False)
     else:
         raise RxpyError('Unexpected qualifier after (?< - ' + char_str)
Beispiel #21
0
 def append_character(self, character):
     '''Add the next character.'''
     char_str = self._parser_state.alphabet.expression_to_str(character)
     if not self.__escape and char_str == '_':
         self.__escape = True
         return self
     elif self.__escape and char_str in 'lceug':
         self._parser_state.new_flag(self.__table['_' + char_str])
         self.__escape = False
         return self
     elif not self.__escape and char_str == 'L':
         raise RxpyError('Locale based classes unsupported')
     elif not self.__escape and char_str in self.__table:
         self._parser_state.new_flag(self.__table[char_str])
         return self
     elif not self.__escape and char_str == ')':
         return self.__parent
     elif self.__escape:
         raise RxpyError('Unexpected characters after (? - _' + char_str)
     else:
         raise RxpyError('Unexpected character after (? - ' + char_str)
Beispiel #22
0
    def clone(self):
        '''
        Duplicate this node (necessary when replacing a numbered repeat with
        explicit, repeated, instances, for example).

        This copies all "public" attributes as constructor kargs.
        '''
        try:
            return self.__class__(**self._kargs())
        except TypeError as e:
            raise RxpyError('Error cloning {0}: {1}'.format(
                self.__class__.__name__, e))
Beispiel #23
0
 def parse(self, text):
     '''Parse a regular expression.'''
     builder, index = self, None
     try:
         for (index, character) in enumerate(text):
             builder = builder.append_character(character)
         builder = builder.append_character(None)
     except ParseError as e:
         e.update(text, index)
         raise
     if self != builder:
         raise RxpyError('Incomplete expression')
     return self.to_sequence().join(Match(), self._parser_state)
Beispiel #24
0
 def append_character(self, character):
     '''Add the next character.'''
     char_str = self._parser_state.alphabet.expression_to_str(character)
     if character is None:
         raise RxpyError('Incomplete character escape')
     elif char_str in 'xuU':
         return CharacterCodeBuilder(self._parser_state, self._parent,
                                     character)
     elif char_str in digits:
         return OctalEscapeBuilder(self._parser_state, self._parent,
                                   character)
     elif char_str in self.__std_escapes:
         return self._parent.append_character(self.__std_escapes[char_str],
                                              escaped=True)
     elif char_str not in ascii_letters:  # matches re.escape
         return self._parent.append_character(character, escaped=True)
     else:
         return self._unexpected_character(character)
Beispiel #25
0
 def __build(self):
     if not self._parent._sequence:
         raise RxpyError('Nothing to repeat')
     latest = self._parent._sequence.pop()
     if (self._parser_state.flags & ParserState._LOOP_UNROLL) and (
         (self._end is None and self._parser_state.unwind(self._begin)) or
         (self._end is not None and self._parser_state.unwind(self._end))):
         for _i in range(self._begin):
             self._parent._sequence.append(latest.clone())
         if self._range:
             if self._end is None:
                 RepeatBuilder.build_star(self._parent, latest.clone(),
                                          self._lazy, self._parser_state)
             else:
                 for _i in range(self._end - self._begin):
                     RepeatBuilder.build_optional(self._parent,
                                                  latest.clone(),
                                                  self._lazy)
     else:
         self.build_count(self._parent, latest, self._begin,
                          self._end if self._range else self._begin,
                          self._lazy, self._parser_state)
Beispiel #26
0
def parse_groups(texts, engine, flags=0, alphabet=None):
    '''
    Parse set of expressions, used to define groups for `Scanner`.
    '''
    from lepl.rxpy.compat.support import default_alphabet
    if not texts:
        raise ValueError('Empty set of texts for scanner')
    alphabet = default_alphabet(alphabet, texts[0])
    parser_state = ParserState(flags=flags,
                               alphabet=alphabet,
                               refuse=engine.REFUSE,
                               require=engine.REQUIRE)
    sequence = SequenceBuilder(parser_state)
    for text in texts:
        sequence.parse_group(text)
    if parser_state.has_new_flags:
        parser_state = parser_state.clone_with_new_flags(texts[0])
        sequence = SequenceBuilder(parser_state)
        for text in texts:
            sequence.parse_group(text)
        if parser_state.has_new_flags:
            raise RxpyError('Inconsistent flags')
    return parser_state, sequence.to_sequence().join(Match(), parser_state)
Beispiel #27
0
def parse(text, parser_state, class_, mutable_flags=True):
    '''
    Parse the text using the given builder.

    If the expression sets flags then it is parsed again.  If it changes flags
    on the second parse then an error is raised.
    '''
    graph = None
    try:
        graph = class_(parser_state).parse(text)
    except RxpyError:
        # suppress error if we will parse again
        if not (mutable_flags and parser_state.has_new_flags):
            raise
    if mutable_flags and parser_state.has_new_flags:
        parser_state = parser_state.clone_with_new_flags(text)
        graph = class_(parser_state).parse(text)
    parser_state.alphabet.validate_expression(text, parser_state.flags)
    actions = resolve_group_names(parser_state)
    actions.append(set_lookahead_properties())
    graph = post_process(graph, actions)
    if parser_state.has_new_flags:
        raise RxpyError('Inconsistent flags')
    return parser_state, graph
Beispiel #28
0
def require_engine(engine):
    if not engine:
        raise RxpyError('Engine must be given for RXPY '
                        '(use an engine-specific re module).')
Beispiel #29
0
    def append_character(self, character, escaped=False):
        '''Add the next character.'''
        def append(character=character):
            '''Helper function to avoid repetition below - adds character.'''
            def unpack(character):
                '''Generate a `CharSet` or a character pair.'''
                (is_charset, value) = \
                    self._parser_state.alphabet.expression_to_charset(
                        character, self._parser_state.flags)
                if not is_charset:
                    value = (character, character)
                return value

            if self._range:
                if self._queue is None:
                    raise RxpyError('Incomplete range')
                else:
                    (alo, ahi) = unpack(self._queue)
                    (blo, bhi) = unpack(character)
                    self._charset.append_interval((alo, blo))
                    self._charset.append_interval((ahi, bhi))
                    self._queue = None
                    self._range = False
            else:
                if self._queue:
                    (lo, hi) = unpack(self._queue)
                    self._charset.append_interval((lo, lo))
                    self._charset.append_interval((hi, hi))
                self._queue = character

        char_str = self._parser_state.alphabet.expression_to_str(character)
        if self._invert is None and char_str == '^':
            self._invert = True
        elif not escaped and char_str == '\\':
            return SimpleEscapeBuilder(self._parser_state, self)
        elif escaped and char_str in 'dD':
            self._charset.append_class(self._parser_state.alphabet.digit,
                                       character, char_str == 'D')
        elif escaped and char_str in 'wW':
            self._charset.append_class(self._parser_state.alphabet.word,
                                       character, char_str == 'W')
        elif escaped and char_str in 'sS':
            self._charset.append_class(self._parser_state.alphabet.space,
                                       character, char_str == 'S')
        # not charset allows first character to be unescaped - or ]
        elif character is not None and \
                ((not self._charset and not self._queue)
                 or escaped or char_str not in "-]"):
            append()
        elif char_str == '-':
            if self._range:
                # repeated - is range to -?
                append()
            else:
                self._range = True
        elif char_str == ']':
            if self._queue:
                if self._range:
                    self._range = False
                    # convert open range to '-'
                    append('-')
                append(None)
            if self._invert:
                self._charset.invert()
            self._parent._sequence.append(self._charset.simplify())
            return self._parent
        else:
            raise RxpyError('Syntax error in character set')

        # after first character this must be known
        if self._invert is None:
            self._invert = False

        return self
Beispiel #30
0
 def decode(buffer, alphabet):
     '''Convert the octal sequence to a character.'''
     try:
         return alphabet.unescape(int(buffer, 8))
     except:
         raise RxpyError('Bad octal escape: ' + buffer)