Exemple #1
0
    def __init__(self, re, _pointer=None, flags=DEFAULT_FLAGS, **options):
        """ Compiles a regular expression. Once compiled, it can be used
        repeatedly to search, split or replace text in a string.

        :param re:      Bytestring expression to compile
        :param flags:   Bitmask of flags
        :param kwargs:  Config options to pass (size_limit, dfa_size_limit)
        """
        if not isinstance(re, bytes):
            raise TypeError("'rure.lib.Rure' must be instantiated with a "
                            "bytestring as first argument.")

        self._err = ffi.gc(_lib.rure_error_new(), _lib.rure_error_free)
        self._opts = ffi.gc(_lib.rure_options_new(), _lib.rure_options_free)

        self.options = options
        if 'size_limit' in options:
            _lib.rure_options_size_limit(self._opts, options['size_limit'])
        if 'dfa_size_limit' in options:
            _lib.rure_options_dfa_size_limit(self._opts,
                                             options['dfa_size_limit'])

        if re:
            s = checked_call(_lib.rure_compile, self._err, re, len(re), flags,
                             self._opts)
        else:
            s = _pointer
        self._ptr = ffi.gc(s, _lib.rure_free)
        self.capture_cls = namedtuple(
            'Captures',
            [i.decode('utf8') if i else u'' for i in self.capture_names()],
            rename=True)
Exemple #2
0
    def __init__(self, *res, **options):
        """ Compiles a regular expression. Once compiled, it can be used
        repeatedly to search, split or replace text in a string.

        :param res:     List of Bytestring expressions to compile
        :param kwargs:  Config options to pass (flags bitmask,
                                                size_limit,
                                                dfa_size_limit)
        """

        flags = options.pop('flags', DEFAULT_FLAGS)
        if not all(isinstance(re, bytes) for re in res):
            raise TypeError("'rure.lib.RureSet' must be instantiated with a "
                            "list of bytestrings as first argument.")

        self._err = ffi.gc(_lib.rure_error_new(), _lib.rure_error_free)
        self._opts = ffi.gc(_lib.rure_options_new(), _lib.rure_options_free)
        self.options = options
        if 'size_limit' in options:
            _lib.rure_options_size_limit(self._opts, options['size_limit'])
        if 'dfa_size_limit' in options:
            _lib.rure_options_dfa_size_limit(self._opts,
                                             options['dfa_size_limit'])

        patterns = []
        patterns_lengths = []
        for re in res:
            patterns.append(ffi.new("uint8_t []", re))
            patterns_lengths.append(len(re))

        s = checked_call(_lib.rure_compile_set, self._err,
                         ffi.new("uint8_t *[]", patterns),
                         ffi.new("size_t []", patterns_lengths), len(patterns),
                         flags, self._opts)
        self._ptr = ffi.gc(s, _lib.rure_set_free)
Exemple #3
0
    def captures(self, haystack, start=0):
        """Returns the capture groups corresponding to the leftmost-first match
        in text. Capture group 0 always corresponds to the entire match.
        If no match is found, then None is returned.

        You should only use captures if you need access to submatches.
        Otherwise, find is faster for discovering the location of the overall
        match.
        """
        hlen = len(haystack)
        captures = ffi.gc(_lib.rure_captures_new(self._ptr),
                          _lib.rure_captures_free)
        match = ffi.new('rure_match *')
        if _lib.rure_find_captures(
            self._ptr,
            haystack,
            hlen,
            start,
            captures
        ):
            return self.capture_cls(*[
                RureMatch(match.start, match.end)
                    if _lib.rure_captures_at(captures, i, match) else None
                for i in range(0, _lib.rure_captures_len(captures))
            ])
Exemple #4
0
 def captures_iter(self, haystack, start=0):
     """Returns an iterator over all the non-overlapping capture groups
     matched in text. This is operationally the same as find_iter,
     except it yields information about submatches.
     """
     hlen = len(haystack)
     captures = ffi.gc(_lib.rure_captures_new(self._ptr),
                       _lib.rure_captures_free)
     captures_iter = ffi.gc(_lib.rure_iter_new(self._ptr),
                            _lib.rure_iter_free)
     match = ffi.new('rure_match *')
     while _lib.rure_iter_next_captures(captures_iter, haystack, hlen,
                                        captures):
         yield self.capture_cls(*[
             RureMatch(match.start, match.end) if _lib.
             rure_captures_at(captures, i, match) else None
             for i in range(0, _lib.rure_captures_len(captures))
         ])
Exemple #5
0
 def capture_names(self):
     """ An iterator over the names of all possible captures.
     None indicates an unnamed capture; the first element (capture 0,
     the whole matched region) is always unnamed.
     """
     cn_iter = ffi.gc(_lib.rure_iter_capture_names_new(self._ptr),
                      _lib.rure_iter_capture_names_free)
     ptr = ffi.new('char **')
     while _lib.rure_iter_capture_names_next(cn_iter, ptr):
         name = ffi.string(ptr[0])
         if name:
             yield name
         else:
             yield None
Exemple #6
0
    def find_iter(self, haystack, start=0):
        """Returns the capture groups corresponding to the leftmost-first match
        in text. Capture group 0 always corresponds to the entire match.
        If no match is found, then None is returned.

        You should only use captures if you need access to submatches.
        Otherwise, find is faster for discovering the location of the overall
        match.
        """
        hlen = len(haystack)
        find_iter = ffi.gc(_lib.rure_iter_new(self._ptr), _lib.rure_iter_free)

        match = ffi.new('rure_match *')
        while _lib.rure_iter_next(find_iter, haystack, hlen, match):
            yield RureMatch(match.start, match.end)