Esempio n. 1
0
 def __init__(self, limit):
     self._dict = {}
     self._ring = Ring()
     self._hits = 0
     self._misses = 0
     self.size = 0
     self.limit = limit
 def __init__(self, jar, target_size=0, cache_size_bytes=0):
     # TODO: forward-port Dieter's bytes stuff
     self.jar = jar
     # We expect the jars to be able to have a pointer to
     # us; this is a reference cycle, but certain
     # aspects of invalidation and accessing depend on it.
     # The actual Connection objects we're used with do set this
     # automatically, but many test objects don't.
     # TODO: track this on the persistent objects themself?
     try:
         jar._cache = self
     except AttributeError:
         # Some ZODB tests pass in an object that cannot have an _cache
         pass
     self.cache_size = target_size
     self.drain_resistance = 0
     self.non_ghost_count = 0
     self.persistent_classes = {}
     self.data = _WeakValueDictionary()
     self.ring = Ring(self.data.cleanup_hook)
     self.cache_size_bytes = cache_size_bytes
class PickleCache(object):

    # Tests may modify this to add additional types
    _CACHEABLE_TYPES = (type, PersistentPy)
    _SWEEPABLE_TYPES = (PersistentPy,)

    total_estimated_size = 0
    cache_size_bytes = 0

    # Set by functions that sweep the entire ring (via _sweeping_ring)
    # Serves as a pseudo-lock
    _is_sweeping_ring = False

    def __init__(self, jar, target_size=0, cache_size_bytes=0):
        # TODO: forward-port Dieter's bytes stuff
        self.jar = jar
        # We expect the jars to be able to have a pointer to
        # us; this is a reference cycle, but certain
        # aspects of invalidation and accessing depend on it.
        # The actual Connection objects we're used with do set this
        # automatically, but many test objects don't.
        # TODO: track this on the persistent objects themself?
        try:
            jar._cache = self
        except AttributeError:
            # Some ZODB tests pass in an object that cannot have an _cache
            pass
        self.cache_size = target_size
        self.drain_resistance = 0
        self.non_ghost_count = 0
        self.persistent_classes = {}
        self.data = _WeakValueDictionary()
        self.ring = Ring(self.data.cleanup_hook)
        self.cache_size_bytes = cache_size_bytes

    # IPickleCache API
    def __len__(self):
        """ See IPickleCache.
        """
        return (len(self.persistent_classes) +
                len(self.data))

    def __getitem__(self, oid):
        """ See IPickleCache.
        """
        value = self.data.get(oid, self)
        if value is not self:
            return value
        return self.persistent_classes[oid]

    def __setitem__(self, oid, value):
        """ See IPickleCache.
        """
        # The order of checks matters for C compatibility;
        # the ZODB tests depend on this

        # The C impl requires either a type or a Persistent subclass
        if not isinstance(value, self._CACHEABLE_TYPES):
            raise TypeError("Cache values must be persistent objects.")

        value_oid = value._p_oid
        if not isinstance(oid, OID_TYPE) or not isinstance(value_oid, OID_TYPE):
            raise TypeError('OID must be %s: key=%s _p_oid=%s' % (OID_TYPE, oid, value_oid))

        if value_oid != oid:
            raise ValueError("Cache key does not match oid")

        if oid in self.persistent_classes or oid in self.data:
            # Have to be careful here, a GC might have just run
            # and cleaned up the object
            existing_data = self.get(oid)
            if existing_data is not None and existing_data is not value:
                # Raise the same type of exception as the C impl with the same
                # message.
                raise ValueError('A different object already has the same oid')
        # Match the C impl: it requires a jar. Let this raise AttributeError
        # if no jar is found.
        jar = value._p_jar
        if jar is None:
            raise ValueError("Cached object jar missing")
        # It also requires that it cannot be cached more than one place
        existing_cache = getattr(jar, '_cache', None) # type: PickleCache
        if (existing_cache is not None
                and existing_cache is not self
                and oid in existing_cache.data):
            raise ValueError("Cache values may only be in one cache.")

        if isinstance(value, type): # ZODB.persistentclass.PersistentMetaClass
            self.persistent_classes[oid] = value
        else:
            self.data[oid] = value
            if _OGA(value, '_p_state') != GHOST and value not in self.ring:
                self.ring.add(value)
                self.non_ghost_count += 1
            elif self.data.cleanup_hook:
                # Ensure we begin monitoring for ``value`` to
                # be deallocated.
                self.ring.ring_node_for(value)

    def __delitem__(self, oid):
        """ See IPickleCache.
        """
        if not isinstance(oid, OID_TYPE):
            raise TypeError('OID must be %s: %s' % (OID_TYPE, oid))
        if oid in self.persistent_classes:
            del self.persistent_classes[oid]
        else:
            pobj = self.data.pop(oid)
            self.ring.delete(pobj)

    def get(self, oid, default=None):
        """ See IPickleCache.
        """
        value = self.data.get(oid, self)
        if value is not self:
            return value
        return self.persistent_classes.get(oid, default)

    def mru(self, oid):
        """ See IPickleCache.
        """
        if self._is_sweeping_ring:
            # accessess during sweeping, such as with an
            # overridden _p_deactivate, don't mutate the ring
            # because that could leave it inconsistent
            return False # marker return for tests

        value = self.data[oid]

        was_in_ring = value in self.ring
        if not was_in_ring:
            if _OGA(value, '_p_state') != GHOST:
                self.ring.add(value)
                self.non_ghost_count += 1
        else:
            self.ring.move_to_head(value)
        return None

    def ringlen(self):
        """ See IPickleCache.
        """
        return len(self.ring)

    def items(self):
        """ See IPickleCache.
        """
        return self.data.items()

    def lru_items(self):
        """ See IPickleCache.
        """
        return [
            (obj._p_oid, obj)
            for obj in self.ring
        ]

    def klass_items(self):
        """ See IPickleCache.
        """
        return self.persistent_classes.items()

    def incrgc(self, ignored=None):
        """ See IPickleCache.
        """
        target = self.cache_size
        if self.drain_resistance >= 1:
            size = self.non_ghost_count
            target2 = size - 1 - (size // self.drain_resistance)
            if target2 < target:
                target = target2
        # return value for testing
        return self._sweep(target, self.cache_size_bytes)

    def full_sweep(self, target=None):
        """ See IPickleCache.
        """
        # return value for testing
        return self._sweep(0)

    minimize = full_sweep

    def new_ghost(self, oid, obj):
        """ See IPickleCache.
        """
        if obj._p_oid is not None:
            raise ValueError('Object already has oid')
        if obj._p_jar is not None:
            raise ValueError('Object already has jar')
        if oid in self.persistent_classes or oid in self.data:
            raise KeyError('Duplicate OID: %s' % oid)
        obj._p_oid = oid
        obj._p_jar = self.jar
        if not isinstance(obj, type):
            if obj._p_state != GHOST:
                # The C implementation sets this stuff directly,
                # but we delegate to the class. However, we must be
                # careful to avoid broken _p_invalidate and _p_deactivate
                # that don't call the super class. See ZODB's
                # testConnection.doctest_proper_ghost_initialization_with_empty__p_deactivate
                obj._p_invalidate_deactivate_helper(False)
        self[oid] = obj

    def reify(self, to_reify):
        """ See IPickleCache.
        """
        if isinstance(to_reify, OID_TYPE): #bytes
            to_reify = [to_reify]
        for oid in to_reify:
            value = self[oid]
            if value._p_state == GHOST:
                value._p_activate()
                self.non_ghost_count += 1
                self.mru(oid)

    def invalidate(self, to_invalidate):
        """ See IPickleCache.
        """
        if isinstance(to_invalidate, OID_TYPE):
            self._invalidate(to_invalidate)
        else:
            for oid in to_invalidate:
                self._invalidate(oid)

    def debug_info(self):
        result = []
        for oid, klass in self.persistent_classes.items():
            result.append((
                oid,
                len(gc.get_referents(klass)),
                type(klass).__name__,
                klass._p_state,
            ))
        for oid, value in self.data.items():
            result.append((
                oid,
                len(gc.get_referents(value)),
                type(value).__name__,
                value._p_state,
            ))
        return result

    def update_object_size_estimation(self, oid, new_size):
        """ See IPickleCache.
        """
        value = self.data.get(oid)

        if value is not None:
            # Recall that while the argument is given in bytes,
            # we have to work with 64-block chunks (plus one)
            # to match the C implementation. Hence the convoluted
            # arithmetic
            new_size_in_24 = _estimated_size_in_24_bits(new_size)
            p_est_size_in_24 = value._Persistent__size
            new_est_size_in_bytes = (new_size_in_24 - p_est_size_in_24) * 64

            self.total_estimated_size += new_est_size_in_bytes

    cache_drain_resistance = property(
        lambda self: self.drain_resistance,
        lambda self, nv: setattr(self, 'drain_resistance', nv)
    )
    cache_non_ghost_count = property(lambda self: self.non_ghost_count)
    cache_data = property(lambda self: dict(self.items()))
    cache_klass_count = property(lambda self: len(self.persistent_classes))

    # Helpers

    # Set to true when a deactivation happens in our code. For
    # compatibility with the C implementation, we can only remove the
    # node and decrement our non-ghost count if our implementation
    # actually runs (broken subclasses can forget to call super; ZODB
    # has tests for this). This gets set to false everytime we examine
    # a node and checked afterwards. The C implementation has a very
    # incestuous relationship between cPickleCache and cPersistence:
    # the pickle cache calls _p_deactivate, which is responsible for
    # both decrementing the non-ghost count and removing its node from
    # the cache ring (and, if it gets deallocated, from the pickle
    # cache's dictionary). We're trying to keep that to a minimum, but
    # there's no way around it if we want full compatibility.
    _persistent_deactivate_ran = False

    @_sweeping_ring
    def _sweep(self, target, target_size_bytes=0):
        ejected = 0
        # If we find and eject objects that may have been weak referenced,
        # we need to run a garbage collection to try to clear those references.
        # Otherwise, it's highly likely that accessing those objects through those
        # references will try to ``_p_activate()`` them, and since the jar they came
        # from is probably closed, that will lead to an error. See
        # https://github.com/zopefoundation/persistent/issues/149
        had_weak_refs = False
        ring = self.ring
        for node, value in ring.iteritems():
            if ((target or target_size_bytes) # pylint:disable=too-many-boolean-expressions
                    and (not target or self.non_ghost_count <= target)
                    and (self.total_estimated_size <= target_size_bytes
                         or not target_size_bytes)):
                break

            if value._p_state == UPTODATE:
                # The C implementation will only evict things that are specifically
                # in the up-to-date state
                self._persistent_deactivate_ran = False

                # sweeping an object out of the cache should also
                # ghost it---that's what C does. This winds up
                # calling `update_object_size_estimation`.
                # Also in C, if this was the last reference to the object,
                # it removes itself from the `data` dictionary.
                # If we're under PyPy or Jython, we need to run a GC collection
                # to make this happen...this is only noticeable though, when
                # we eject objects. Also, note that we can only take any of these
                # actions if our _p_deactivate ran, in case of buggy subclasses.
                # see _persistent_deactivate_ran.

                if not had_weak_refs:
                    had_weak_refs |= getattr(value, '__weakref__', None) is not None

                value._p_deactivate()
                if (self._persistent_deactivate_ran
                        # Test-cases sneak in non-Persistent objects, sigh, so naturally
                        # they don't cooperate (without this check a bunch of test_picklecache
                        # breaks)
                        or not isinstance(value, self._SWEEPABLE_TYPES)):
                    ring.delete_node(node)
                    ejected += 1
                    self.non_ghost_count -= 1

        if ejected and had_weak_refs:
            # Clear the iteration variables, so the objects they point to
            # are subject to GC.
            node = None
            value = None
            gc.collect()
        return ejected

    @_sweeping_ring
    def _invalidate(self, oid):
        value = self.data.get(oid)
        if value is not None and value._p_state != GHOST:
            value._p_invalidate()
            self.ring.delete(value)
            self.non_ghost_count -= 1
        elif oid in self.persistent_classes:
            persistent_class = self.persistent_classes.pop(oid)
            try:
                # ZODB.persistentclass.PersistentMetaClass objects
                # have this method and it must be called for transaction abort
                # and other forms of invalidation to work
                persistent_class._p_invalidate()
            except AttributeError:
                pass
Esempio n. 4
0
class LocalClientBucket(object):
    """
    A map that keeps a record of its approx. size.

    keys must be `str`` and values must be byte strings.

    This class is not threadsafe, accesses to __setitem__ and get_and_bubble_all
    must be protected by a lock.
    """

    def __init__(self, limit):
        self._dict = {}
        self._ring = Ring()
        self._hits = 0
        self._misses = 0
        self.size = 0
        self.limit = limit

    def reset_stats(self):
        self._hits = 0
        self._misses = 0

    def stats(self):
        total = self._hits + self._misses
        return {'hits': self._hits,
                'misses': self._misses,
                'ratio': self._hits/total if total else 0,
                'size': len(self._dict),
                'bytes': self.size}

    def __len__(self):
        return len(self._dict)

    def __setitem__(self, key, value):
        """
        Set an item.

        If the memory limit would be exceeded, remove old items until
        that is no longer the case.
        """
        # These types are gated by LocalClient, we don't need to double
        # check.
        #assert isinstance(key, str)
        #assert isinstance(value, bytes)

        sizedelta = len(value)

        if key in self._dict:
            entry = self._dict[key]
            oldvalue = entry.value
            sizedelta -= len(oldvalue)
            entry.value = value
            self._ring.move_to_head(entry)
        else:
            sizedelta += len(key)
            entry = _RingEntry(key, value)
            self._ring.add(entry)
            self._dict[key] = entry

        while self._dict and self.size + sizedelta > self.limit:
            oldest = next(iter(self._ring))
            if oldest._p_oid is key:
                break
            self.__delitem__(oldest._p_oid)

        self.size += sizedelta
        return True

    def __contains__(self, key):
        return key in self._dict

    def __delitem__(self, key):
        entry = self._dict[key]
        oldvalue = entry.value
        del self._dict[key]
        self._ring.delete(entry)
        sizedelta = len(key)
        sizedelta += len(oldvalue)
        self.size -= sizedelta

    def get_and_bubble_all(self, keys):
        dct = self._dict
        rng = self._ring
        res = {}
        for key in keys:
            entry = dct.get(key)
            if entry is not None:
                self._hits += 1
                rng.move_to_head(entry)
                res[key] = entry.value
            else:
                self._misses += 1
        return res

    def get(self, key):
        # Testing only. Does not bubble.
        entry = self._dict.get(key)
        if entry is not None:
            return entry.value

    def __getitem__(self, key):
        # Testing only
        return self._dict[key].value

    def load_from_file(self, cache_file):
        now = time.time()
        unpick = Unpickler(cache_file)
        version = unpick.load()
        if version != 1: # pragma: no cover
            raise ValueError("Incorrect version of cache_file")
        count = unpick.load()
        stored = 0
        loaded_dict = unpick.load()
        if not self._dict:
            # bulk-update in C for speed
            stored = len(loaded_dict)
            self._dict.update(loaded_dict)
            for ring_entry in itervalues(loaded_dict):
                if self.size < self.limit:
                    self._ring.add(ring_entry)
                    self.size += len(ring_entry.key) + len(ring_entry.value)
                else:
                    # We're too big! ignore these things from now on.
                    # This is unlikely.
                    del self._dict[ring_entry.key]
        else:
            new_keys = set(loaded_dict.keys()) - set(self._dict.keys())
            stored += len(new_keys)
            # Loading more data into an existing bucket.
            # Load only the *new* keys, but don't care about LRU,
            # it's all screwed up anyway at this point
            for new_key in new_keys:
                new_ring_entry = loaded_dict[new_key]
                self._dict[new_key] = new_ring_entry
                self._ring.add(new_ring_entry)

                self.size += len(new_key) + len(new_ring_entry.value)
                if self.size >= self.limit: # pragma: no cover
                    break


        then = time.time()
        log.info("Examined %d and stored %d items from %s in %s",
                 count, stored, cache_file, then - now)
        return count, stored

    def write_to_file(self, cache_file):
        now = time.time()
        # pickling the items is about 2-3x faster than marshal
        pickler = Pickler(cache_file, -1) # Highest protocol

        pickler.dump(1) # Version marker
        assert len(self._dict) == len(self._ring)
        pickler.dump(len(self._dict)) # How many pairs we write
        # We lose the order. We'll have to build it up again as we go.
        pickler.dump(self._dict)

        then = time.time()
        stats = self.stats()
        log.info("Wrote %d items to %s in %s. Total hits %s; misses %s; ratio %s",
                 stats['size'], cache_file, then - now,
                 stats['hits'], stats['misses'], stats['ratio'])