Esempio n. 1
0
    def test_alternatives(self):
        self.assert_graphs(
            build(Alternatives()), """digraph {
 0 [label="NoMatch"]
 1 [label="Match"]
 0 -> 1
}""")
        self.assert_graphs(
            build(Alternatives([Sequence([n(1), n(2), n(3)])])), """digraph {
 0 [label="1"]
 1 [label="2"]
 2 [label="3"]
 3 [label="Match"]
 0 -> 1
 1 -> 2
 2 -> 3
}""")
        self.assert_graphs(
            build(
                Alternatives([
                    Sequence([n(1), n(2), n(3)]),
                    Sequence([n(4), n(5)]),
                    Sequence()
                ])), """digraph {
 0 [label="...|..."]
 1 [label="1"]
 2 [label="4"]
 3 [label="Match"]
 4 [label="5"]
 5 [label="2"]
 6 [label="3"]
 0 -> 1
 0 -> 2
 0 -> 3
 2 -> 4
 4 -> 3
 1 -> 5
 5 -> 6
 6 -> 3
}""")
Esempio n. 2
0
    def callback(self, yes_no, terminal):
        '''Callback used by `yesNoBuilder` to accumulate data.'''

        # first callback - have 'yes', possibly terminated by '|'
        if self.__yes is None:
            (self.__yes, yes_no) = (yes_no, None)
            # collect second alternative
            if terminal == '|':
                return YesNoBuilder(self, self._parser_state, self.__parent,
                                    ')')

        # final callback - build yes and no (if present)
        yes = self.__yes.to_sequence()
        no = yes_no.to_sequence() if yes_no else Sequence()
        label = ('...' if yes else '') + ('|...' if no else '')
        if not label:
            label = '|'
        split = lambda label: Conditional(self.__name, label)
        alternatives = Alternatives([no, yes], label=label, split=split)
        self.__parent._sequence.append(alternatives)
        return self.__parent
Esempio n. 3
0
 def __init__(self, parser_state):
     super(SequenceBuilder, self).__init__(parser_state)
     self._alternatives = Alternatives()
     self._sequence = Sequence()
Esempio n. 4
0
class SequenceBuilder(Builder):
    '''
    Parse a sequence (this is the main entry point for parsing, but users
    will normally call `parse_pattern`).
    '''

    def __init__(self, parser_state):
        super(SequenceBuilder, self).__init__(parser_state)
        self._alternatives = Alternatives()
        self._sequence = Sequence()

    def parse(self, text):
        '''Parse a regular expression.'''
        builder, index = self, None
        try:
            for (index, character) in enumerate(text):
                builder = builder.append_character(character)
            builder = builder.append_character(None)
        except ParseError as e:
            e.update(text, index)
            raise
        if self != builder:
            raise RxpyError('Incomplete expression')
        return self.to_sequence().join(Match(), self._parser_state)

    def parse_group(self, text):
        '''Parse a set of groups for `Scanner`.'''
        builder = GroupBuilder(self._parser_state, self)
        if self._sequence:
            self.__start_new_alternative()
        for character in text:
            builder = builder.append_character(character)
        try:
            builder = builder.append_character(')')
            assert builder == self
        except:
            raise RxpyError('Incomplete group')

    def append_character(self, character, escaped=False):
        '''Add the next character.'''
        char_str = self._parser_state.alphabet.expression_to_str(character)
        if not escaped and char_str == '\\':
            return ComplexEscapeBuilder(self._parser_state, self)
        elif not escaped and char_str == '{':
            return CountBuilder(self._parser_state, self, character)
        elif not escaped and char_str == '(':
            return GroupEscapeBuilder(self._parser_state, self)
        elif not escaped and char_str == '[':
            return CharacterBuilder(self._parser_state, self)
        elif not escaped and char_str == '.':
            self._sequence.append(Dot(self._parser_state.flags & ParserState.DOT_ALL))
        elif not escaped and char_str == '^':
            self._sequence.append(StartOfLine(self._parser_state.flags & ParserState.MULTILINE))
        elif not escaped and char_str == '$':
            self._sequence.append(EndOfLine(self._parser_state.flags & ParserState.MULTILINE))
        elif not escaped and char_str == '|':
            self.__start_new_alternative()
        elif character is not None and self._sequence and (
                not escaped and char_str in '+?*'):
            return RepeatBuilder(self._parser_state, self, self._sequence.pop(), character)
        elif character is not None and (
                escaped or self._parser_state.significant(character)):
            (is_pair, value) = \
                self._parser_state.alphabet.expression_to_charset(character,
                                                     self._parser_state.flags)
            if is_pair:
                self._sequence.append(Character([(value[0], value[0]),
                                             (value[1], value[1])],
                                             self._parser_state.alphabet))
            else:
                self._sequence.append(String(value))
        return self

    def __start_new_alternative(self):
        self._alternatives.append(self._sequence)
        self._sequence = Sequence()

    def to_sequence(self):
        '''Retrieve contents as a sequence.'''
        if not self._alternatives:
            return self._sequence
        else:
            self.__start_new_alternative()
            return Sequence([self._alternatives])

    def __bool__(self):
        return bool(self._sequence)
Esempio n. 5
0
 def __init__(self, parser_state):
     super(SequenceBuilder, self).__init__(parser_state)
     self._alternatives = Alternatives()
     self._sequence = Sequence()
Esempio n. 6
0
class SequenceBuilder(Builder):
    '''
    Parse a sequence (this is the main entry point for parsing, but users
    will normally call `parse_pattern`).
    '''
    def __init__(self, parser_state):
        super(SequenceBuilder, self).__init__(parser_state)
        self._alternatives = Alternatives()
        self._sequence = Sequence()

    def parse(self, text):
        '''Parse a regular expression.'''
        builder, index = self, None
        try:
            for (index, character) in enumerate(text):
                builder = builder.append_character(character)
            builder = builder.append_character(None)
        except ParseError as e:
            e.update(text, index)
            raise
        if self != builder:
            raise RxpyError('Incomplete expression')
        return self.to_sequence().join(Match(), self._parser_state)

    def parse_group(self, text):
        '''Parse a set of groups for `Scanner`.'''
        builder = GroupBuilder(self._parser_state, self)
        if self._sequence:
            self.__start_new_alternative()
        for character in text:
            builder = builder.append_character(character)
        try:
            builder = builder.append_character(')')
            assert builder == self
        except:
            raise RxpyError('Incomplete group')

    def append_character(self, character, escaped=False):
        '''Add the next character.'''
        char_str = self._parser_state.alphabet.expression_to_str(character)
        if not escaped and char_str == '\\':
            return ComplexEscapeBuilder(self._parser_state, self)
        elif not escaped and char_str == '{':
            return CountBuilder(self._parser_state, self, character)
        elif not escaped and char_str == '(':
            return GroupEscapeBuilder(self._parser_state, self)
        elif not escaped and char_str == '[':
            return CharacterBuilder(self._parser_state, self)
        elif not escaped and char_str == '.':
            self._sequence.append(
                Dot(self._parser_state.flags & ParserState.DOT_ALL))
        elif not escaped and char_str == '^':
            self._sequence.append(
                StartOfLine(self._parser_state.flags & ParserState.MULTILINE))
        elif not escaped and char_str == '$':
            self._sequence.append(
                EndOfLine(self._parser_state.flags & ParserState.MULTILINE))
        elif not escaped and char_str == '|':
            self.__start_new_alternative()
        elif character is not None and self._sequence and (not escaped and
                                                           char_str in '+?*'):
            return RepeatBuilder(self._parser_state, self,
                                 self._sequence.pop(), character)
        elif character is not None and (
                escaped or self._parser_state.significant(character)):
            (is_pair, value) = \
                self._parser_state.alphabet.expression_to_charset(character,
                                                     self._parser_state.flags)
            if is_pair:
                self._sequence.append(
                    Character([(value[0], value[0]), (value[1], value[1])],
                              self._parser_state.alphabet))
            else:
                self._sequence.append(String(value))
        return self

    def __start_new_alternative(self):
        self._alternatives.append(self._sequence)
        self._sequence = Sequence()

    def to_sequence(self):
        '''Retrieve contents as a sequence.'''
        if not self._alternatives:
            return self._sequence
        else:
            self.__start_new_alternative()
            return Sequence([self._alternatives])

    def __bool__(self):
        return bool(self._sequence)