def append_character(self, character, escaped=False): if self._create is None: if character == '<': self._create = True elif character == '=': self._create = False else: raise RxpyException( 'Unexpected qualifier after (?P - ' + character) else: if self._create and not escaped and character == '>': if not self._name: raise RxpyException('Empty name for group') return GroupBuilder(self._state, self._parent, True, self._name) elif not self._create and not escaped and character == ')': self._parent._sequence.append( GroupReference(self._state.index_for_name_or_count(self._name))) return self._parent elif not escaped and character == '\\': # this is just for the name return SimpleEscapeBuilder(self._state, self) elif character: self._name += character else: raise RxpyException('Incomplete named group') return self
def append_character(self, character): if not character: raise RxpyException('Incomplete unicode escape') self.__buffer += character self.__remaining -= 1 if self.__remaining: return self try: return self.__parent.append_character( self._state.alphabet.code_to_char(int(self.__buffer, 16)), escaped=True) except: raise RxpyException('Bad unicode escape: ' + self.__buffer)
def __init__(self, flags=0, alphabet=None, hint_alphabet=None, require=0, refuse=0): ''' `flags` - initial flags set by user (bits as int) `alphabet` - optional alphabet (if given, checked against flags; if not given inferred from flags and hint) `hint_alphabet` - used to help auto-detect ASCII and Unicode in 2.6 `require` - fkags required by the alphabet `refuse` - flags refused by the alphabet ''' self.__new_flags = 0 self.__initial_alphabet = alphabet self.__hint_alphabet = hint_alphabet self.__require = require self.__refuse = refuse flags = flags | require # default, if nothing specified, is unicode if alphabet is None and not ( flags & (ParserState.ASCII | ParserState.UNICODE)): alphabet = hint_alphabet if hint_alphabet else Unicode() # else, if alphabet given, set flag elif alphabet: if isinstance(alphabet, Ascii): flags |= ParserState.ASCII elif isinstance(alphabet, Unicode): flags |= ParserState.UNICODE elif flags & (ParserState.ASCII | ParserState.UNICODE): raise RxpyException( 'The alphabet is inconsistent with the parser flags') # if alphabet missing, set from flag else: if flags & ParserState.ASCII: alphabet = Ascii() if flags & ParserState.UNICODE: alphabet = Unicode() # check contradictions if (flags & ParserState.ASCII) and (flags & ParserState.UNICODE): raise RxpyException('Cannot specify Unicode and ASCII together') refuse_flags(flags & refuse) self.__alphabet = alphabet self.__flags = flags self.groups = GroupState() self.__comment = False # used to track comments with extended syntax self.__unwind_credit = 10
def append_character(self, character): if not character: raise RxpyException('Incomplete character escape') elif character in digits and character != '0': return GroupReferenceBuilder(self._state, self._parent, character) elif character == 'A': self._parent._sequence.append(StartOfLine(False)) return self._parent elif character in 'bB': self._parent._sequence.append(WordBoundary(character=='B')) return self._parent elif character in 'dD': self._parent._sequence.append(Digit(character=='D')) return self._parent elif character in 'wW': self._parent._sequence.append(Word(character=='W')) return self._parent elif character in 'sS': self._parent._sequence.append(Space(character=='S')) return self._parent elif character == 'Z': self._parent._sequence.append(EndOfLine(False)) return self._parent else: return super(ComplexEscapeBuilder, self).append_character(character)
def index_for_name_or_count(self, name): ''' Given a group name or index (as text), return the group index (as int). First, we parse as an integer, then we try as a name. ''' try: index = int(name) if index not in self.__index_to_name: raise RxpyException('Unknown index ' + str(name)) else: return index except ValueError: if name not in self.__name_to_index: raise RxpyException('Unknown name ' + str(name)) else: return self.__name_to_index[name]
def append_character(self, character): if self._closed: if not self._lazy and character == '?': self._lazy = True return self else: self.__build() return self._parent.append_character(character) empty = not self._acc and self._begin is None if empty and character == '}': for character in '{}': self._parent.append_character(character, escaped=True) return self._parent elif character == '}': self.__store_value() self._closed = True elif character == ',': self.__store_value() elif character: self._acc += character else: raise RxpyException('Incomplete count specification') return self
def unescape(self, code): # for compatability with python... if code < 512: return self.code_to_char(code % 256) else: raise RxpyException('Unexpected character code for ASCII: ' + str(code))
def append_character(self, character, escaped=False): if character is None: raise RxpyException('Incomplete conditional match') elif not escaped and character in self.__terminals: return self.__conditional.callback(self, character) else: return super(YesNoBuilder, self).append_character(character, escaped)
def append_character(self, character): if not character: raise RxpyException('Incomplete character escape') elif character in digits and character != '0': return GroupReferenceBuilder(self._state, self._parent, character) else: return super(IntermediateEscapeBuilder, self).append_character(character)
def append_character(self, character): self._count += 1 if self._count == 1: if character == '?': return self else: builder = GroupBuilder(self._state, self._parent) return builder.append_character(character) else: if character == ':': return GroupBuilder(self._state, self._parent, binding=False) elif character in ParserStateBuilder.INITIAL: return ParserStateBuilder(self._state, self._parent).append_character(character) elif character == 'P': return NamedGroupBuilder(self._state, self._parent) elif character == '#': return CommentGroupBuilder(self._state, self._parent) elif character == '=': return LookaheadBuilder( self._state, self._parent, True, True) elif character == '!': return LookaheadBuilder( self._state, self._parent, False, True) elif character == '<': return LookbackBuilder(self._state, self._parent) elif character == '(': return ConditionalBuilder(self._state, self._parent) else: raise RxpyException( 'Unexpected qualifier after (? - ' + character)
def append_character(self, character): if character == '=': return LookaheadBuilder(self._state, self._parent, True, False) elif character == '!': return LookaheadBuilder(self._state, self._parent, False, False) else: raise RxpyException( 'Unexpected qualifier after (?< - ' + character)
def parse(self, text): builder = self for character in text: builder = builder.append_character(character) builder = builder.append_character(None) if self != builder: raise RxpyException('Incomplete expression') return self._sequence.join(Match(), self._state)
def unpack(self, char, flags): ''' Return either (True, CharSet) or (False, char) ''' from rxpy.parser.support import ParserState if flags & ParserState.IGNORECASE: raise RxpyException('Default alphabet does not handle case') return (False, self.join(self.coerce(char)))
def append_character(self, character): if not character: raise RxpyException('Incomplete character escape') elif character == 'g': return ReplacementGroupReferenceBuilder(self._state, self._parent) else: return super(ReplacementEscapeBuilder, self).append_character(character)
def append_character(self, character, escaped=False): if not escaped and character == ')': return self._parent elif not escaped and character == '\\': return SimpleEscapeBuilder(self._state, self) elif character: return self else: raise RxpyException('Incomplete comment')
def post_process(graph, actions): map = {} for (type_, function) in actions: if type_ not in map: map[type_] = function else: raise RxpyException('Conflicting actions for ' + str(type_)) for node in node_iterator(graph): map.get(type(node), lambda x: None)(node) return graph
def append_character(self, character): if not self.__escape and character == '_': self.__escape = True return self elif self.__escape and character in 'lceug': self._state.new_flag(self.__table['_' + character]) self.__escape = False return self elif not self.__escape and character == 'L': raise RxpyException('Locale based classes unsupported') elif not self.__escape and character in self.__table: self._state.new_flag(self.__table[character]) return self elif not self.__escape and character == ')': return self.__parent elif self.__escape: raise RxpyException('Unexpected characters after (? - _' + character) else: raise RxpyException('Unexpected character after (? - ' + character)
def append_character(self, character): # this is so complex because the tests for different errors are so # detailed if not self.__buffer and character == '<': self.__buffer += character return self elif len(self.__buffer) > 1 and character == '>': self.__parent._sequence.append(self.__decode()) return self.__parent elif character and self.__numeric and character in digits: self.__buffer += character return self elif character and self.__name and character in ALPHANUMERIC: self.__buffer += character return self elif character: raise RxpyException('Unexpected character in group escape: ' + character) else: raise RxpyException('Incomplete group escape')
def append_character(self, character): lazy = character == '?' if character and character in '+*': raise RxpyException('Compound repeat: ' + self._initial_character + character) elif self._initial_character == '?': self.build_optional(self._parent, self._latest, lazy) elif self._initial_character == '+': self.build_plus(self._parent, self._latest, lazy, self._state) elif self._initial_character == '*': self.build_star(self._parent, self._latest, lazy, self._state) else: raise RxpyException('Bad initial character for RepeatBuilder') if lazy: return self._parent else: return self._parent.append_character(character)
def clone(self): ''' Duplicate this node (necessary when replacing a numbered repeat with explicit, repeated, instances, for example). This copies all "public" attributes as constructor kargs. ''' try: return self.__class__(**self._kargs()) except TypeError as e: raise RxpyException('Error cloning {0}: {1}'.format( self.__class__.__name__, e))
def parse_groups(texts, engine, flags=0, alphabet=None): ''' Parse set of expressions, used to define groups for `Scanner`. ''' state = ParserState(flags=flags, alphabet=alphabet, refuse=engine.REFUSE, require=engine.REQUIRE) sequence = SequenceBuilder(state) for text in texts: sequence.parse_group(text) if state.has_new_flags: raise RxpyException('Inconsistent flags') return (state, sequence.to_sequence().join(Match(), state))
def parse_group(self, text): ''' Parse a set of groups for `Scanner`. ''' builder = GroupBuilder(self._state, self) if self._sequence: self.__start_new_alternative() for character in text: builder = builder.append_character(character) try: builder = builder.append_character(')') assert builder == self except: raise RxpyException('Incomplete group')
def __store_value(self): if self._begin is None: if not self._acc: raise RxpyException('Missing lower limit for repeat') else: try: self._begin = int(self._acc) except ValueError: raise RxpyException( 'Bad lower limit for repeat: ' + self._acc) else: if self._range: raise RxpyException('Too many values in repeat') self._range = True if self._acc: try: self._end = int(self._acc) except ValueError: raise RxpyException( 'Bad upper limit for repeat: ' + self._acc) if self._begin > self._end: raise RxpyException('Inconsistent repeat range') self._acc = ''
def append_character(self, character): if not character: raise RxpyException('Incomplete character escape') elif character in 'xuU': return CharacterCodeBuilder(self._state, self._parent, character) elif character in digits: return OctalEscapeBuilder(self._state, self._parent, character) elif character in self.__std_escapes: return self._parent.append_character( self.__std_escapes[character], escaped=True) elif character not in ascii_letters: # matches re.escape return self._parent.append_character(character, escaped=True) else: return self._unexpected_character(character)
def parse(self, text): ''' Parse a regular expression. ''' builder = self try: for (character, index) in zip(text, count()): builder = builder.append_character(character) builder = builder.append_character(None) except ParseException as e: e.update(text, index) raise if self != builder: raise RxpyException('Incomplete expression') return self.to_sequence().join(Match(), self._state)
def append(character=character): if self._range: if self._queue is None: raise RxpyException('Incomplete range') else: (alo, ahi) = unpack(self._queue) (blo, bhi) = unpack(character) self._charset.append_interval((alo, blo)) self._charset.append_interval((ahi, bhi)) self._queue = None self._range = False else: if self._queue: (lo, hi) = unpack(self._queue) self._charset.append_interval((lo, lo)) self._charset.append_interval((hi, hi)) self._queue = character
def parse(text, state, class_, mutable_flags=True): ''' Parse the text using the given builder. If the expression sets flags then it is parsed again. If it changes flags on the second parse then an error is raised. ''' try: graph = class_(state).parse(text) except RxpyException: # suppress error if we will parse again if not (mutable_flags and state.has_new_flags): raise if mutable_flags and state.has_new_flags: state = state.clone_with_new_flags() graph = class_(state).parse(text) graph = post_process(graph, resolve_group_names(state)) if state.has_new_flags: raise RxpyException('Inconsistent flags') return (state, graph)
def __build(self): if not self._parent._sequence: raise RxpyException('Nothing to repeat') latest = self._parent._sequence.pop() if (self._state.flags & ParserState._LOOP_UNROLL) and ( (self._end is None and self._state.unwind(self._begin)) or (self._end is not None and self._state.unwind(self._end))): for _i in range(self._begin): self._parent._sequence.append(latest.clone()) if self._range: if self._end is None: RepeatBuilder.build_star( self._parent, latest.clone(), self._lazy, self._state) else: for _i in range(self._end - self._begin): RepeatBuilder.build_optional( self._parent, latest.clone(), self._lazy) else: self.build_count(self._parent, latest, self._begin, self._end if self._range else self._begin, self._lazy, self._state)
def require_engine(engine): if not engine: raise RxpyException('Engine must be given for RXPY ' '(use an engine-specific re module).')
def decode(buffer, alphabet): try: return alphabet.unescape(int(buffer, 8)) except: raise RxpyException('Bad octal escape: ' + buffer)