def test_alternatives(self): self.assert_graphs( build(Alternatives()), """digraph { 0 [label="NoMatch"] 1 [label="Match"] 0 -> 1 }""") self.assert_graphs( build(Alternatives([Sequence([n(1), n(2), n(3)])])), """digraph { 0 [label="1"] 1 [label="2"] 2 [label="3"] 3 [label="Match"] 0 -> 1 1 -> 2 2 -> 3 }""") self.assert_graphs( build( Alternatives([ Sequence([n(1), n(2), n(3)]), Sequence([n(4), n(5)]), Sequence() ])), """digraph { 0 [label="...|..."] 1 [label="1"] 2 [label="4"] 3 [label="Match"] 4 [label="5"] 5 [label="2"] 6 [label="3"] 0 -> 1 0 -> 2 0 -> 3 2 -> 4 4 -> 3 1 -> 5 5 -> 6 6 -> 3 }""")
def callback(self, yes_no, terminal): '''Callback used by `yesNoBuilder` to accumulate data.''' # first callback - have 'yes', possibly terminated by '|' if self.__yes is None: (self.__yes, yes_no) = (yes_no, None) # collect second alternative if terminal == '|': return YesNoBuilder(self, self._parser_state, self.__parent, ')') # final callback - build yes and no (if present) yes = self.__yes.to_sequence() no = yes_no.to_sequence() if yes_no else Sequence() label = ('...' if yes else '') + ('|...' if no else '') if not label: label = '|' split = lambda label: Conditional(self.__name, label) alternatives = Alternatives([no, yes], label=label, split=split) self.__parent._sequence.append(alternatives) return self.__parent
def __init__(self, parser_state): super(SequenceBuilder, self).__init__(parser_state) self._alternatives = Alternatives() self._sequence = Sequence()
class SequenceBuilder(Builder): ''' Parse a sequence (this is the main entry point for parsing, but users will normally call `parse_pattern`). ''' def __init__(self, parser_state): super(SequenceBuilder, self).__init__(parser_state) self._alternatives = Alternatives() self._sequence = Sequence() def parse(self, text): '''Parse a regular expression.''' builder, index = self, None try: for (index, character) in enumerate(text): builder = builder.append_character(character) builder = builder.append_character(None) except ParseError as e: e.update(text, index) raise if self != builder: raise RxpyError('Incomplete expression') return self.to_sequence().join(Match(), self._parser_state) def parse_group(self, text): '''Parse a set of groups for `Scanner`.''' builder = GroupBuilder(self._parser_state, self) if self._sequence: self.__start_new_alternative() for character in text: builder = builder.append_character(character) try: builder = builder.append_character(')') assert builder == self except: raise RxpyError('Incomplete group') def append_character(self, character, escaped=False): '''Add the next character.''' char_str = self._parser_state.alphabet.expression_to_str(character) if not escaped and char_str == '\\': return ComplexEscapeBuilder(self._parser_state, self) elif not escaped and char_str == '{': return CountBuilder(self._parser_state, self, character) elif not escaped and char_str == '(': return GroupEscapeBuilder(self._parser_state, self) elif not escaped and char_str == '[': return CharacterBuilder(self._parser_state, self) elif not escaped and char_str == '.': self._sequence.append(Dot(self._parser_state.flags & ParserState.DOT_ALL)) elif not escaped and char_str == '^': self._sequence.append(StartOfLine(self._parser_state.flags & ParserState.MULTILINE)) elif not escaped and char_str == '$': self._sequence.append(EndOfLine(self._parser_state.flags & ParserState.MULTILINE)) elif not escaped and char_str == '|': self.__start_new_alternative() elif character is not None and self._sequence and ( not escaped and char_str in '+?*'): return RepeatBuilder(self._parser_state, self, self._sequence.pop(), character) elif character is not None and ( escaped or self._parser_state.significant(character)): (is_pair, value) = \ self._parser_state.alphabet.expression_to_charset(character, self._parser_state.flags) if is_pair: self._sequence.append(Character([(value[0], value[0]), (value[1], value[1])], self._parser_state.alphabet)) else: self._sequence.append(String(value)) return self def __start_new_alternative(self): self._alternatives.append(self._sequence) self._sequence = Sequence() def to_sequence(self): '''Retrieve contents as a sequence.''' if not self._alternatives: return self._sequence else: self.__start_new_alternative() return Sequence([self._alternatives]) def __bool__(self): return bool(self._sequence)
class SequenceBuilder(Builder): ''' Parse a sequence (this is the main entry point for parsing, but users will normally call `parse_pattern`). ''' def __init__(self, parser_state): super(SequenceBuilder, self).__init__(parser_state) self._alternatives = Alternatives() self._sequence = Sequence() def parse(self, text): '''Parse a regular expression.''' builder, index = self, None try: for (index, character) in enumerate(text): builder = builder.append_character(character) builder = builder.append_character(None) except ParseError as e: e.update(text, index) raise if self != builder: raise RxpyError('Incomplete expression') return self.to_sequence().join(Match(), self._parser_state) def parse_group(self, text): '''Parse a set of groups for `Scanner`.''' builder = GroupBuilder(self._parser_state, self) if self._sequence: self.__start_new_alternative() for character in text: builder = builder.append_character(character) try: builder = builder.append_character(')') assert builder == self except: raise RxpyError('Incomplete group') def append_character(self, character, escaped=False): '''Add the next character.''' char_str = self._parser_state.alphabet.expression_to_str(character) if not escaped and char_str == '\\': return ComplexEscapeBuilder(self._parser_state, self) elif not escaped and char_str == '{': return CountBuilder(self._parser_state, self, character) elif not escaped and char_str == '(': return GroupEscapeBuilder(self._parser_state, self) elif not escaped and char_str == '[': return CharacterBuilder(self._parser_state, self) elif not escaped and char_str == '.': self._sequence.append( Dot(self._parser_state.flags & ParserState.DOT_ALL)) elif not escaped and char_str == '^': self._sequence.append( StartOfLine(self._parser_state.flags & ParserState.MULTILINE)) elif not escaped and char_str == '$': self._sequence.append( EndOfLine(self._parser_state.flags & ParserState.MULTILINE)) elif not escaped and char_str == '|': self.__start_new_alternative() elif character is not None and self._sequence and (not escaped and char_str in '+?*'): return RepeatBuilder(self._parser_state, self, self._sequence.pop(), character) elif character is not None and ( escaped or self._parser_state.significant(character)): (is_pair, value) = \ self._parser_state.alphabet.expression_to_charset(character, self._parser_state.flags) if is_pair: self._sequence.append( Character([(value[0], value[0]), (value[1], value[1])], self._parser_state.alphabet)) else: self._sequence.append(String(value)) return self def __start_new_alternative(self): self._alternatives.append(self._sequence) self._sequence = Sequence() def to_sequence(self): '''Retrieve contents as a sequence.''' if not self._alternatives: return self._sequence else: self.__start_new_alternative() return Sequence([self._alternatives]) def __bool__(self): return bool(self._sequence)