def __init__(self, re, _pointer=None, flags=DEFAULT_FLAGS, **options): """ Compiles a regular expression. Once compiled, it can be used repeatedly to search, split or replace text in a string. :param re: Bytestring expression to compile :param flags: Bitmask of flags :param kwargs: Config options to pass (size_limit, dfa_size_limit) """ if not isinstance(re, bytes): raise TypeError("'rure.lib.Rure' must be instantiated with a " "bytestring as first argument.") self._err = ffi.gc(_lib.rure_error_new(), _lib.rure_error_free) self._opts = ffi.gc(_lib.rure_options_new(), _lib.rure_options_free) self.options = options if 'size_limit' in options: _lib.rure_options_size_limit(self._opts, options['size_limit']) if 'dfa_size_limit' in options: _lib.rure_options_dfa_size_limit(self._opts, options['dfa_size_limit']) if re: s = checked_call(_lib.rure_compile, self._err, re, len(re), flags, self._opts) else: s = _pointer self._ptr = ffi.gc(s, _lib.rure_free) self.capture_cls = namedtuple( 'Captures', [i.decode('utf8') if i else u'' for i in self.capture_names()], rename=True)
def __init__(self, *res, **options): """ Compiles a regular expression. Once compiled, it can be used repeatedly to search, split or replace text in a string. :param res: List of Bytestring expressions to compile :param kwargs: Config options to pass (flags bitmask, size_limit, dfa_size_limit) """ flags = options.pop('flags', DEFAULT_FLAGS) if not all(isinstance(re, bytes) for re in res): raise TypeError("'rure.lib.RureSet' must be instantiated with a " "list of bytestrings as first argument.") self._err = ffi.gc(_lib.rure_error_new(), _lib.rure_error_free) self._opts = ffi.gc(_lib.rure_options_new(), _lib.rure_options_free) self.options = options if 'size_limit' in options: _lib.rure_options_size_limit(self._opts, options['size_limit']) if 'dfa_size_limit' in options: _lib.rure_options_dfa_size_limit(self._opts, options['dfa_size_limit']) patterns = [] patterns_lengths = [] for re in res: patterns.append(ffi.new("uint8_t []", re)) patterns_lengths.append(len(re)) s = checked_call(_lib.rure_compile_set, self._err, ffi.new("uint8_t *[]", patterns), ffi.new("size_t []", patterns_lengths), len(patterns), flags, self._opts) self._ptr = ffi.gc(s, _lib.rure_set_free)
def captures(self, haystack, start=0): """Returns the capture groups corresponding to the leftmost-first match in text. Capture group 0 always corresponds to the entire match. If no match is found, then None is returned. You should only use captures if you need access to submatches. Otherwise, find is faster for discovering the location of the overall match. """ hlen = len(haystack) captures = ffi.gc(_lib.rure_captures_new(self._ptr), _lib.rure_captures_free) match = ffi.new('rure_match *') if _lib.rure_find_captures( self._ptr, haystack, hlen, start, captures ): return self.capture_cls(*[ RureMatch(match.start, match.end) if _lib.rure_captures_at(captures, i, match) else None for i in range(0, _lib.rure_captures_len(captures)) ])
def captures_iter(self, haystack, start=0): """Returns an iterator over all the non-overlapping capture groups matched in text. This is operationally the same as find_iter, except it yields information about submatches. """ hlen = len(haystack) captures = ffi.gc(_lib.rure_captures_new(self._ptr), _lib.rure_captures_free) captures_iter = ffi.gc(_lib.rure_iter_new(self._ptr), _lib.rure_iter_free) match = ffi.new('rure_match *') while _lib.rure_iter_next_captures(captures_iter, haystack, hlen, captures): yield self.capture_cls(*[ RureMatch(match.start, match.end) if _lib. rure_captures_at(captures, i, match) else None for i in range(0, _lib.rure_captures_len(captures)) ])
def capture_names(self): """ An iterator over the names of all possible captures. None indicates an unnamed capture; the first element (capture 0, the whole matched region) is always unnamed. """ cn_iter = ffi.gc(_lib.rure_iter_capture_names_new(self._ptr), _lib.rure_iter_capture_names_free) ptr = ffi.new('char **') while _lib.rure_iter_capture_names_next(cn_iter, ptr): name = ffi.string(ptr[0]) if name: yield name else: yield None
def find_iter(self, haystack, start=0): """Returns the capture groups corresponding to the leftmost-first match in text. Capture group 0 always corresponds to the entire match. If no match is found, then None is returned. You should only use captures if you need access to submatches. Otherwise, find is faster for discovering the location of the overall match. """ hlen = len(haystack) find_iter = ffi.gc(_lib.rure_iter_new(self._ptr), _lib.rure_iter_free) match = ffi.new('rure_match *') while _lib.rure_iter_next(find_iter, haystack, hlen, match): yield RureMatch(match.start, match.end)