Example #1
0
 def _iter(self, key, lo, hi, prefix, reverse, max, include):
     """Setup a woeful chain of iterators that yields index entries.
     """
     txn = self.store._txn_context.get()
     it = iterators.BasicIterator(txn, self.prefix)
     return iterators.from_args(it, key, lo, hi, prefix, reverse, max,
                                include, None)
Example #2
0
 def _iter(self, key, lo, hi, prefix, reverse, max_, include, max_phys):
     it = self.strategy.iter(self.store._txn_context.get())
     return iterators.from_args(it, key, lo, hi, prefix, reverse, max_,
                                include, max_phys)
Example #3
0
    def batch(self,
              lo=None,
              hi=None,
              prefix=None,
              max_recs=None,
              max_bytes=None,
              max_keylen=None,
              preserve=True,
              max_phys=None,
              grouper=None):
        """
        Search the key range *lo..hi* for individual records, combining them
        into a batches.

        Returns `(found, made, last_key)` indicating the number of records
        combined, the number of batches produced, and the last key visited
        before `max_phys` was exceeded.

        Batch size is controlled via `max_recs` and `max_bytes`; at least one
        must not be ``None``. Larger sizes may cause pathological behaviour in
        the storage engine (for example, space inefficiency). Since batches are
        fully decompressed before any member may be accessed via
        :py:meth:`get() <Collection.get>` or :py:meth:`iteritems()
        <Collection.iteritems>`, larger sizes may slow decompression, waste IO
        bandwidth, and temporarily use more RAM.

            `lo`:
                Lowest search key.

            `hi`:
                Highest search key.

            `max_recs`:
                Maximum number of records contained by any single batch. When
                this count is reached, the current batch is saved and a new one
                is created.

            `max_bytes`:
                Maximum size in bytes of the batch record's value after
                compression, or ``None`` for no maximum size. When not
                ``None``, values are recompressed after each member is
                appended, in order to test if `max_bytes` has been reached. This
                is inefficient, but provides the best guarantee of final record
                size. Single records are skipped if they exceed this size when
                compressed individually.

            `preserve`:
                If ``True``, then existing batch records in the database are
                left untouched. When one is found within `lo..hi`, the
                currently building batch is finished and the found batch is
                skipped over.

                If ``False``, found batches are exploded and their members
                contribute to the currently building batch.

            `max_phys`:
                Maximum number of physical keys to visit in any particular
                call. A collection may be incrementally batched by repeatedly
                invoking :py:meth:`Collection.batch` with `max` set, and `lo`
                set to `last_key` of the previous run, until `found` returns
                ``0``. This allows batching to complete over several
                transactions without blocking other users.

            `grouper`:
                Specifies a grouping function used to decide when to avoid
                compressing unrelated records. The function is passed a
                record's value. A new batch is triggered each time the
                function's return value changes.

        """
        assert max_bytes or max_recs, 'max_bytes and/or max_recs is required.'

        txn = self.store._txn_context.get()
        it = self.ITERATOR_CLASS(txn, self.prefix, self.compressor)
        groupval = object()
        items = []

        for r in iterators.from_args(it, None, lo, hi, prefix, False, None,
                                     True, max_phys):
            if preserve and len(r.keys) > 1:
                self._write_batch(txn, items, self.compressor)
            else:
                txn.delete(keylib.packs(r.key, self.prefix))
                items.append((r.key, r.data))
                if max_bytes:
                    _, encoded = self._prepare_batch(items)
                    if len(encoded) > max_bytes:
                        items.pop()
                        self._write_batch(txn, items)
                        items.append((r.key, r.data))
                done = max_recs and len(items) == max_recs
                if (not done) and grouper:
                    val = grouper(self.encoder.unpack(r.key, r.data))
                    done = val != groupval
                    groupval = val
                if done:
                    self._write_batch(txn, items)
        self._write_batch(txn, items)