Esempio n. 1
0
class SequenceBuilder(Builder):
    '''
    Parse a sequence (this is the main entry point for parsing, but users
    will normally call `parse_pattern`).
    '''

    def __init__(self, parser_state):
        super(SequenceBuilder, self).__init__(parser_state)
        self._alternatives = Alternatives()
        self._sequence = Sequence()

    def parse(self, text):
        '''Parse a regular expression.'''
        builder, index = self, None
        try:
            for (index, character) in enumerate(text):
                builder = builder.append_character(character)
            builder = builder.append_character(None)
        except ParseError as e:
            e.update(text, index)
            raise
        if self != builder:
            raise RxpyError('Incomplete expression')
        return self.to_sequence().join(Match(), self._parser_state)

    def parse_group(self, text):
        '''Parse a set of groups for `Scanner`.'''
        builder = GroupBuilder(self._parser_state, self)
        if self._sequence:
            self.__start_new_alternative()
        for character in text:
            builder = builder.append_character(character)
        try:
            builder = builder.append_character(')')
            assert builder == self
        except:
            raise RxpyError('Incomplete group')

    def append_character(self, character, escaped=False):
        '''Add the next character.'''
        char_str = self._parser_state.alphabet.expression_to_str(character)
        if not escaped and char_str == '\\':
            return ComplexEscapeBuilder(self._parser_state, self)
        elif not escaped and char_str == '{':
            return CountBuilder(self._parser_state, self, character)
        elif not escaped and char_str == '(':
            return GroupEscapeBuilder(self._parser_state, self)
        elif not escaped and char_str == '[':
            return CharacterBuilder(self._parser_state, self)
        elif not escaped and char_str == '.':
            self._sequence.append(Dot(self._parser_state.flags & ParserState.DOT_ALL))
        elif not escaped and char_str == '^':
            self._sequence.append(StartOfLine(self._parser_state.flags & ParserState.MULTILINE))
        elif not escaped and char_str == '$':
            self._sequence.append(EndOfLine(self._parser_state.flags & ParserState.MULTILINE))
        elif not escaped and char_str == '|':
            self.__start_new_alternative()
        elif character is not None and self._sequence and (
                not escaped and char_str in '+?*'):
            return RepeatBuilder(self._parser_state, self, self._sequence.pop(), character)
        elif character is not None and (
                escaped or self._parser_state.significant(character)):
            (is_pair, value) = \
                self._parser_state.alphabet.expression_to_charset(character,
                                                     self._parser_state.flags)
            if is_pair:
                self._sequence.append(Character([(value[0], value[0]),
                                             (value[1], value[1])],
                                             self._parser_state.alphabet))
            else:
                self._sequence.append(String(value))
        return self

    def __start_new_alternative(self):
        self._alternatives.append(self._sequence)
        self._sequence = Sequence()

    def to_sequence(self):
        '''Retrieve contents as a sequence.'''
        if not self._alternatives:
            return self._sequence
        else:
            self.__start_new_alternative()
            return Sequence([self._alternatives])

    def __bool__(self):
        return bool(self._sequence)
Esempio n. 2
0
class SequenceBuilder(Builder):
    '''
    Parse a sequence (this is the main entry point for parsing, but users
    will normally call `parse_pattern`).
    '''
    def __init__(self, parser_state):
        super(SequenceBuilder, self).__init__(parser_state)
        self._alternatives = Alternatives()
        self._sequence = Sequence()

    def parse(self, text):
        '''Parse a regular expression.'''
        builder, index = self, None
        try:
            for (index, character) in enumerate(text):
                builder = builder.append_character(character)
            builder = builder.append_character(None)
        except ParseError as e:
            e.update(text, index)
            raise
        if self != builder:
            raise RxpyError('Incomplete expression')
        return self.to_sequence().join(Match(), self._parser_state)

    def parse_group(self, text):
        '''Parse a set of groups for `Scanner`.'''
        builder = GroupBuilder(self._parser_state, self)
        if self._sequence:
            self.__start_new_alternative()
        for character in text:
            builder = builder.append_character(character)
        try:
            builder = builder.append_character(')')
            assert builder == self
        except:
            raise RxpyError('Incomplete group')

    def append_character(self, character, escaped=False):
        '''Add the next character.'''
        char_str = self._parser_state.alphabet.expression_to_str(character)
        if not escaped and char_str == '\\':
            return ComplexEscapeBuilder(self._parser_state, self)
        elif not escaped and char_str == '{':
            return CountBuilder(self._parser_state, self, character)
        elif not escaped and char_str == '(':
            return GroupEscapeBuilder(self._parser_state, self)
        elif not escaped and char_str == '[':
            return CharacterBuilder(self._parser_state, self)
        elif not escaped and char_str == '.':
            self._sequence.append(
                Dot(self._parser_state.flags & ParserState.DOT_ALL))
        elif not escaped and char_str == '^':
            self._sequence.append(
                StartOfLine(self._parser_state.flags & ParserState.MULTILINE))
        elif not escaped and char_str == '$':
            self._sequence.append(
                EndOfLine(self._parser_state.flags & ParserState.MULTILINE))
        elif not escaped and char_str == '|':
            self.__start_new_alternative()
        elif character is not None and self._sequence and (not escaped and
                                                           char_str in '+?*'):
            return RepeatBuilder(self._parser_state, self,
                                 self._sequence.pop(), character)
        elif character is not None and (
                escaped or self._parser_state.significant(character)):
            (is_pair, value) = \
                self._parser_state.alphabet.expression_to_charset(character,
                                                     self._parser_state.flags)
            if is_pair:
                self._sequence.append(
                    Character([(value[0], value[0]), (value[1], value[1])],
                              self._parser_state.alphabet))
            else:
                self._sequence.append(String(value))
        return self

    def __start_new_alternative(self):
        self._alternatives.append(self._sequence)
        self._sequence = Sequence()

    def to_sequence(self):
        '''Retrieve contents as a sequence.'''
        if not self._alternatives:
            return self._sequence
        else:
            self.__start_new_alternative()
            return Sequence([self._alternatives])

    def __bool__(self):
        return bool(self._sequence)