Example #1
0
    def __new__(cls, obj, recursive=False):
        self = object.__new__(cls)
        obj = aq_base(obj)
        connection = obj._p_jar
        ObjectReader.__init__(self, connection, connection._cache,
                              connection._db.classFactory)
        ObjectWriter.__init__(self, obj)
        migrated_oid_set = set()
        oid_set = {obj._p_oid}
        while oid_set:
            oid = oid_set.pop()
            obj = ObjectReader.load_oid(self, oid)
            obj._p_activate()
            klass = obj.__class__
            self.lazy = None
            if not recursive:
                _setOb = getattr(klass, '_setOb', None)
                if _setOb:
                    if isinstance(_setOb, WorkflowMethod):
                        _setOb = _setOb._m
                    import six
                    if six.get_unbound_function(
                            _setOb) is six.get_unbound_function(
                                OFS_Folder._setOb):
                        self.lazy = Ghost
                elif klass.__module__[:
                                      7] == 'BTrees.' and klass.__name__ != 'Length':
                    self.lazy = LazyBTree()
            self.oid_dict = {}
            self.oid_set = set()
            p, serial = self._conn._storage.load(oid, '')
            unpickler = self._get_unpickler(p)

            def find_global(*args):
                self.do_migrate = args != (klass.__module__, klass.__name__) and \
                                  not isOldBTree('%s.%s' % args)
                unpickler.find_global = self._get_class
                return self._get_class(*args)

            unpickler.find_global = find_global
            unpickler.load()  # class
            state = unpickler.load()
            if isinstance(self.lazy, LazyPersistent):
                self.oid_set.update(self.lazy.getOidList(state))
            migrated_oid_set.add(oid)
            oid_set |= self.oid_set - migrated_oid_set
            self.oid_set = None
            if self.do_migrate:
                log.debug('PickleUpdater: migrate %r (%r)', obj, klass)
                self.setGhostState(obj, self.serialize(obj))
                obj._p_changed = 1
Example #2
0
    def __new__(cls, obj, recursive=False):
        assert cls.get, "Persistent migration of pickle requires ZODB >= 3.5"
        self = object.__new__(cls)
        obj = aq_base(obj)
        connection = obj._p_jar
        ObjectReader.__init__(self, connection, connection._cache, connection._db.classFactory)
        ObjectWriter.__init__(self, obj)
        migrated_oid_set = set()
        oid_set = set((obj._p_oid,))
        while oid_set:
            oid = oid_set.pop()
            obj = self.get(oid)
            obj._p_activate()
            klass = obj.__class__
            self.lazy = None
            if not recursive:
                _setOb = getattr(klass, "_setOb", None)
                if _setOb:
                    if isinstance(_setOb, WorkflowMethod):
                        _setOb = _setOb._m
                    if _setOb.im_func is OFS_Folder._setOb.im_func:
                        self.lazy = Ghost
                elif klass.__module__[:7] == "BTrees." and klass.__name__ != "Length":
                    self.lazy = LazyBTree()
            self.oid_dict = {}
            self.oid_set = set()
            p, serial = self._conn._storage.load(oid, "")
            unpickler = self._get_unpickler(p)

            def find_global(*args):
                self.do_migrate = args != (klass.__module__, klass.__name__) and not isOldBTree("%s.%s" % args)
                unpickler.find_global = self._get_class
                return self._get_class(*args)

            unpickler.find_global = find_global
            unpickler.load()  # class
            state = unpickler.load()
            if isinstance(self.lazy, LazyPersistent):
                self.oid_set.update(self.lazy.getOidList(state))
            migrated_oid_set.add(oid)
            oid_set |= self.oid_set - migrated_oid_set
            self.oid_set = None
            if self.do_migrate:
                log.debug("PickleUpdater: migrate %r (%r)", obj, klass)
                self.setGhostState(obj, self.serialize(obj))
                obj._p_changed = 1
Example #3
0
 def load_persistent(self, oid, klass):
   obj = ObjectReader.load_persistent(self, oid, klass)
   if self.oid_set is not None:
     if not self.lazy:
       self.oid_set.add(oid)
     obj._p_activate()
     self.oid_dict[oid] = oid_klass = ObjectWriter.persistent_id(self, obj)
     if oid_klass != (oid, klass):
       self.do_migrate = True
   return obj
Example #4
0
    def __init__(self, db, cache_size=400, before=None, cache_size_bytes=0):
        """Create a new Connection."""

        self._log = logging.getLogger('ZODB.Connection')
        self._debug_info = ()

        self._db = db
        self.large_record_size = db.large_record_size

        # historical connection
        self.before = before

        # Multi-database support
        self.connections = {self._db.database_name: self}

        storage = db._mvcc_storage
        if before:
            try:
                before_instance = storage.before_instance
            except AttributeError:

                def before_instance(before):
                    return HistoricalStorageAdapter(storage.new_instance(),
                                                    before)

            storage = before_instance(before)
        else:
            storage = storage.new_instance()

        self._normal_storage = self._storage = storage
        self.new_oid = db.new_oid
        self._savepoint_storage = None

        # Do we need to join a txn manager?
        self._needs_to_join = True
        self.transaction_manager = None
        self.opened = None  # time.time() when DB.open() opened us

        self._reset_counter = global_reset_counter
        self._load_count = 0  # Number of objects unghosted
        self._store_count = 0  # Number of objects stored

        # Cache which can ghostify (forget the state of) objects not
        # recently used. Its API is roughly that of a dict, with
        # additional gc-related and invalidation-related methods.
        self._cache = PickleCache(self, cache_size, cache_size_bytes)

        # The pre-cache is used by get to avoid infinite loops when
        # objects immediately load their state whern they get their
        # persistent data set.
        self._pre_cache = {}

        # List of all objects (not oids) registered as modified by the
        # persistence machinery, or by add(), or whose access caused a
        # ReadConflictError (just to be able to clean them up from the
        # cache on abort with the other modified objects). All objects
        # of this list are either in _cache or in _added.
        self._registered_objects = []  # [object]

        # ids and serials of objects for which readCurrent was called
        # in a transaction.
        self._readCurrent = {}  # {oid ->serial}

        # Dict of oid->obj added explicitly through add(). Used as a
        # preliminary cache until commit time when objects are all moved
        # to the real _cache. The objects are moved to _creating at
        # commit time.
        self._added = {}  # {oid -> object}

        # During commit this is turned into a list, which receives
        # objects added as a side-effect of storing a modified object.
        self._added_during_commit = None

        # During commit, all objects go to either _modified or _creating:

        # Dict of oid->flag of new objects (without serial), either
        # added by add() or implicitly added (discovered by the
        # serializer during commit). The flag is True for implicit
        # adding. Used during abort to remove created objects from the
        # _cache, and by persistent_id to check that a new object isn't
        # reachable from multiple databases.
        self._creating = {}  # {oid -> implicitly_added_flag}

        # List of oids of modified objects, which have to be invalidated
        # in the cache on abort and in other connections on finish.
        self._modified = []  # [oid]

        # We intend to prevent committing a transaction in which
        # ReadConflictError occurs.  _conflicts is the set of oids that
        # experienced ReadConflictError.  Any time we raise ReadConflictError,
        # the oid should be added to this set, and we should be sure that the
        # object is registered.  Because it's registered, Connection.commit()
        # will raise ReadConflictError again (because the oid is in
        # _conflicts).
        self._conflicts = {}

        # To support importFile(), implemented in the ExportImport base
        # class, we need to run _importDuringCommit() from our commit()
        # method.  If _import is not None, it is a two-tuple of arguments
        # to pass to _importDuringCommit().
        self._import = None

        self._reader = ObjectReader(self, self._cache, self._db.classFactory)
Example #5
0
class Connection(ExportImport, object):
    """Connection to ZODB for loading and storing objects."""

    _code_timestamp = 0

    ##########################################################################
    # Connection methods, ZODB.IConnection

    def __init__(self, db, cache_size=400, before=None, cache_size_bytes=0):
        """Create a new Connection."""

        self._log = logging.getLogger('ZODB.Connection')
        self._debug_info = ()

        self._db = db
        self.large_record_size = db.large_record_size

        # historical connection
        self.before = before

        # Multi-database support
        self.connections = {self._db.database_name: self}

        storage = db._mvcc_storage
        if before:
            try:
                before_instance = storage.before_instance
            except AttributeError:

                def before_instance(before):
                    return HistoricalStorageAdapter(storage.new_instance(),
                                                    before)

            storage = before_instance(before)
        else:
            storage = storage.new_instance()

        self._normal_storage = self._storage = storage
        self.new_oid = db.new_oid
        self._savepoint_storage = None

        # Do we need to join a txn manager?
        self._needs_to_join = True
        self.transaction_manager = None
        self.opened = None  # time.time() when DB.open() opened us

        self._reset_counter = global_reset_counter
        self._load_count = 0  # Number of objects unghosted
        self._store_count = 0  # Number of objects stored

        # Cache which can ghostify (forget the state of) objects not
        # recently used. Its API is roughly that of a dict, with
        # additional gc-related and invalidation-related methods.
        self._cache = PickleCache(self, cache_size, cache_size_bytes)

        # The pre-cache is used by get to avoid infinite loops when
        # objects immediately load their state whern they get their
        # persistent data set.
        self._pre_cache = {}

        # List of all objects (not oids) registered as modified by the
        # persistence machinery, or by add(), or whose access caused a
        # ReadConflictError (just to be able to clean them up from the
        # cache on abort with the other modified objects). All objects
        # of this list are either in _cache or in _added.
        self._registered_objects = []  # [object]

        # ids and serials of objects for which readCurrent was called
        # in a transaction.
        self._readCurrent = {}  # {oid ->serial}

        # Dict of oid->obj added explicitly through add(). Used as a
        # preliminary cache until commit time when objects are all moved
        # to the real _cache. The objects are moved to _creating at
        # commit time.
        self._added = {}  # {oid -> object}

        # During commit this is turned into a list, which receives
        # objects added as a side-effect of storing a modified object.
        self._added_during_commit = None

        # During commit, all objects go to either _modified or _creating:

        # Dict of oid->flag of new objects (without serial), either
        # added by add() or implicitly added (discovered by the
        # serializer during commit). The flag is True for implicit
        # adding. Used during abort to remove created objects from the
        # _cache, and by persistent_id to check that a new object isn't
        # reachable from multiple databases.
        self._creating = {}  # {oid -> implicitly_added_flag}

        # List of oids of modified objects, which have to be invalidated
        # in the cache on abort and in other connections on finish.
        self._modified = []  # [oid]

        # We intend to prevent committing a transaction in which
        # ReadConflictError occurs.  _conflicts is the set of oids that
        # experienced ReadConflictError.  Any time we raise ReadConflictError,
        # the oid should be added to this set, and we should be sure that the
        # object is registered.  Because it's registered, Connection.commit()
        # will raise ReadConflictError again (because the oid is in
        # _conflicts).
        self._conflicts = {}

        # To support importFile(), implemented in the ExportImport base
        # class, we need to run _importDuringCommit() from our commit()
        # method.  If _import is not None, it is a two-tuple of arguments
        # to pass to _importDuringCommit().
        self._import = None

        self._reader = ObjectReader(self, self._cache, self._db.classFactory)

    def add(self, obj):
        """Add a new object 'obj' to the database and assign it an oid."""
        if self.opened is None:
            raise ConnectionStateError("The database connection is closed")

        marker = object()
        oid = getattr(obj, "_p_oid", marker)
        if oid is marker:
            raise TypeError(
                "Only first-class persistent objects may be"
                " added to a Connection.", obj)
        elif obj._p_jar is None:
            self._add(obj, self.new_oid())
        elif obj._p_jar is not self:
            raise InvalidObjectReference(obj, obj._p_jar)

    def _add(self, obj, oid):
        assert obj._p_oid is None
        oid = obj._p_oid = oid
        obj._p_jar = self
        if self._added_during_commit is not None:
            self._added_during_commit.append(obj)
        self._register(obj)
        # Add to _added after calling register(), so that _added
        # can be used as a test for whether the object has been
        # registered with the transaction.
        self._added[oid] = obj

    def get(self, oid):
        """Return the persistent object with oid 'oid'."""
        if self.opened is None:
            raise ConnectionStateError("The database connection is closed")

        obj = self._cache.get(oid, None)
        if obj is not None:
            return obj
        obj = self._added.get(oid, None)
        if obj is not None:
            return obj
        obj = self._pre_cache.get(oid, None)
        if obj is not None:
            return obj

        p, _ = self._storage.load(oid)
        obj = self._reader.getGhost(p)

        # Avoid infiniate loop if obj tries to load its state before
        # it is added to the cache and it's state refers to it.
        # (This will typically be the case for non-ghostifyable objects,
        # like persistent caches.)
        self._pre_cache[oid] = obj
        self._cache.new_ghost(oid, obj)
        self._pre_cache.pop(oid)
        return obj

    def cacheMinimize(self):
        """Deactivate all unmodified objects in the cache.
        """
        for connection in six.itervalues(self.connections):
            connection._cache.minimize()

    # TODO: we should test what happens when cacheGC is called mid-transaction.
    def cacheGC(self):
        """Reduce cache size to target size.
        """
        for connection in six.itervalues(self.connections):
            connection._cache.incrgc()

    __onCloseCallbacks = None

    def onCloseCallback(self, f):
        """Register a callable, f, to be called by close()."""
        if self.__onCloseCallbacks is None:
            self.__onCloseCallbacks = []
        self.__onCloseCallbacks.append(f)

    def close(self, primary=True):
        """Close the Connection."""
        if not self._needs_to_join:
            # We're currently joined to a transaction.
            raise ConnectionStateError("Cannot close a connection joined to "
                                       "a transaction")

        if self._cache is not None:
            self._cache.incrgc()  # This is a good time to do some GC

        # Call the close callbacks.
        if self.__onCloseCallbacks is not None:
            callbacks = self.__onCloseCallbacks
            self.__onCloseCallbacks = None
            for f in callbacks:
                try:
                    f()
                except:  # except what?
                    f = getattr(f, 'im_self', f)
                    self._log.exception("Close callback failed for %s", f)

        self._debug_info = ()

        if self.opened:
            self.transaction_manager.unregisterSynch(self)

        if primary:
            for connection in self.connections.values():
                if connection is not self:
                    connection.close(False)

            # Return the connection to the pool.
            if self.opened is not None:
                self._db._returnToPool(self)

                # _returnToPool() set self.opened to None.
                # However, we can't assert that here, because self may
                # have been reused (by another thread) by the time we
                # get back here.
        else:
            self.opened = None

        am = self._db._activity_monitor
        if am is not None:
            am.closedConnection(self)

    def db(self):
        """Returns a handle to the database this connection belongs to."""
        return self._db

    def isReadOnly(self):
        """Returns True if this connection is read only."""
        if self.opened is None:
            raise ConnectionStateError("The database connection is closed")
        return self._storage.isReadOnly()

    @property
    def root(self):
        """Return the database root object."""
        return RootConvenience(self.get(z64))

    def get_connection(self, database_name):
        """Return a Connection for the named database."""
        connection = self.connections.get(database_name)
        if connection is None:
            new_con = self._db.databases[database_name].open(
                transaction_manager=self.transaction_manager,
                before=self.before,
            )
            self.connections.update(new_con.connections)
            new_con.connections = self.connections
            connection = new_con
        return connection

    def _implicitlyAdding(self, oid):
        """Are we implicitly adding an object within the current transaction

        This is used in a check to avoid implicitly adding an object
        to a database in a multi-database situation.
        See serialize.ObjectWriter.persistent_id.

        """
        return (self._creating.get(oid, 0)
                or ((self._savepoint_storage is not None)
                    and self._savepoint_storage.creating.get(oid, 0)))

    def sync(self):
        """Manually update the view on the database."""
        self.transaction_manager.begin()

    def getDebugInfo(self):
        """Returns a tuple with different items for debugging the
        connection.
        """
        return self._debug_info

    def setDebugInfo(self, *args):
        """Add the given items to the debug information of this connection."""
        self._debug_info = self._debug_info + args

    def getTransferCounts(self, clear=False):
        """Returns the number of objects loaded and stored."""
        res = self._load_count, self._store_count
        if clear:
            self._load_count = 0
            self._store_count = 0
        return res

    # Connection methods
    ##########################################################################

    ##########################################################################
    # Data manager (ISavepointDataManager) methods

    def abort(self, transaction):
        """Abort a transaction and forget all changes."""

        # The order is important here.  We want to abort registered
        # objects before we process the cache.  Otherwise, we may un-add
        # objects added in savepoints.  If they've been modified since
        # the savepoint, then they won't have _p_oid or _p_jar after
        # they've been unadded. This will make the code in _abort
        # confused.
        self._abort()

        if self._savepoint_storage is not None:
            self._abort_savepoint()

        self._invalidate_creating()
        self._tpc_cleanup()

    def _abort(self):
        """Abort a transaction and forget all changes."""

        for obj in self._registered_objects:
            oid = obj._p_oid
            assert oid is not None
            if oid in self._added:
                del self._added[oid]
                if self._cache.get(oid) is not None:
                    del self._cache[oid]
                del obj._p_jar
                del obj._p_oid
                if obj._p_changed:
                    obj._p_changed = False
            else:
                # Note: If we invalidate a non-ghostifiable object
                # (i.e. a persistent class), the object will
                # immediately reread its state.  That means that the
                # following call could result in a call to
                # self.setstate, which, of course, must succeed.
                # In general, it would be better if the read could be
                # delayed until the start of the next transaction.  If
                # we read at the end of a transaction and if the
                # object was invalidated during this transaction, then
                # we'll read non-current data, which we'll discard
                # later in transaction finalization.  Unfortnately, we
                # can only delay the read if this abort corresponds to
                # a top-level-transaction abort.  We can't tell if
                # this is a top-level-transaction abort, so we have to
                # go ahead and invalidate now.  Fortunately, it's
                # pretty unlikely that the object we are invalidating
                # was invalidated by another thread, so the risk of a
                # reread is pretty low.

                self._cache.invalidate(oid)

    def _tpc_cleanup(self):
        """Performs cleanup operations to support tpc_finish and tpc_abort."""
        self._conflicts.clear()
        self._needs_to_join = True
        self._registered_objects = []
        self._creating.clear()

    def tpc_begin(self, transaction):
        """Begin commit of a transaction, starting the two-phase commit."""
        self._modified = []

        # _creating is a list of oids of new objects, which is used to
        # remove them from the cache if a transaction aborts.
        self._creating.clear()
        self._normal_storage.tpc_begin(transaction)

    def commit(self, transaction):
        """Commit changes to an object"""

        if self._savepoint_storage is not None:

            # We first checkpoint the current changes to the savepoint
            self.savepoint()

            # then commit all of the savepoint changes at once
            self._commit_savepoint(transaction)

            # No need to call _commit since savepoint did.

        else:
            self._commit(transaction)

        for oid, serial in six.iteritems(self._readCurrent):
            try:
                self._storage.checkCurrentSerialInTransaction(
                    oid, serial, transaction)
            except ConflictError:
                self._cache.invalidate(oid)
                raise

    def _commit(self, transaction):
        """Commit changes to an object"""

        if self.before is not None:
            raise ReadOnlyHistoryError()

        if self._import:
            # We are importing an export file. We alsways do this
            # while making a savepoint so we can copy export data
            # directly to our storage, typically a TmpStore.
            self._importDuringCommit(transaction, *self._import)
            self._import = None

        # Just in case an object is added as a side-effect of storing
        # a modified object.  If, for example, a __getstate__() method
        # calls add(), the newly added objects will show up in
        # _added_during_commit.  This sounds insane, but has actually
        # happened.

        self._added_during_commit = []

        for obj in self._registered_objects:
            oid = obj._p_oid
            assert oid
            if oid in self._conflicts:
                raise ReadConflictError(object=obj)

            if obj._p_jar is not self:
                raise InvalidObjectReference(obj, obj._p_jar)
            elif oid in self._added:
                assert obj._p_serial == z64
            elif oid in self._creating or not obj._p_changed:
                # Nothing to do.  It's been said that it's legal, e.g., for
                # an object to set _p_changed to false after it's been
                # changed and registered.
                # And new objects that are registered after any referrer are
                # already processed.
                continue

            self._store_objects(ObjectWriter(obj), transaction)

        for obj in self._added_during_commit:
            self._store_objects(ObjectWriter(obj), transaction)
        self._added_during_commit = None

    def _store_objects(self, writer, transaction):
        for obj in writer:
            oid = obj._p_oid
            serial = getattr(obj, "_p_serial", z64)

            if ((serial == z64)
                    and ((self._savepoint_storage is None) or
                         (oid not in self._savepoint_storage.creating)
                         or self._savepoint_storage.creating[oid])):

                # obj is a new object

                # Because obj was added, it is now in _creating, so it
                # can be removed from _added.  If oid wasn't in
                # adding, then we are adding it implicitly.

                implicitly_adding = self._added.pop(oid, None) is None

                self._creating[oid] = implicitly_adding

            else:
                self._modified.append(oid)

            p = writer.serialize(obj)  # This calls __getstate__ of obj
            if len(p) >= self.large_record_size:
                warnings.warn(large_object_message % (obj.__class__, len(p)))

            if isinstance(obj, Blob):
                if not IBlobStorage.providedBy(self._storage):
                    raise Unsupported("Storing Blobs in %s is not supported." %
                                      repr(self._storage))
                if obj.opened():
                    raise ValueError("Can't commit with opened blobs.")
                blobfilename = obj._uncommitted()
                if blobfilename is None:
                    assert serial is not None  # See _uncommitted
                    self._modified.pop()  # not modified
                    continue
                s = self._storage.storeBlob(oid, serial, p, blobfilename, '',
                                            transaction)
                # we invalidate the object here in order to ensure
                # that that the next attribute access of its name
                # unghostify it, which will cause its blob data
                # to be reattached "cleanly"
                obj._p_invalidate()
            else:
                s = self._storage.store(oid, serial, p, '', transaction)

            self._store_count += 1
            # Put the object in the cache before handling the
            # response, just in case the response contains the
            # serial number for a newly created object
            try:
                self._cache[oid] = obj
            except:
                # Dang, I bet it's wrapped:
                # TODO:  Deprecate, then remove, this.
                if hasattr(obj, 'aq_base'):
                    self._cache[oid] = obj.aq_base
                else:
                    raise

            self._cache.update_object_size_estimation(oid, len(p))
            obj._p_estimated_size = len(p)

            # if we write an object, we don't want to check if it was read
            # while current.  This is a convenient choke point to do this.
            self._readCurrent.pop(oid, None)
            if s:
                # savepoint
                obj._p_changed = 0  # transition from changed to up-to-date
                obj._p_serial = s

    def tpc_abort(self, transaction):
        if self._import:
            self._import = None

        if self._savepoint_storage is not None:
            self._abort_savepoint()

        self._storage.tpc_abort(transaction)

        # Note: If we invalidate a non-ghostifiable object (i.e. a
        # persistent class), the object will immediately reread its
        # state.  That means that the following call could result in a
        # call to self.setstate, which, of course, must succeed.  In
        # general, it would be better if the read could be delayed
        # until the start of the next transaction.  If we read at the
        # end of a transaction and if the object was invalidated
        # during this transaction, then we'll read non-current data,
        # which we'll discard later in transaction finalization.  We
        # could, theoretically queue this invalidation by calling
        # self.invalidate.  Unfortunately, attempts to make that
        # change resulted in mysterious test failures.  It's pretty
        # unlikely that the object we are invalidating was invalidated
        # by another thread, so the risk of a reread is pretty low.
        # It's really not worth the effort to pursue this.

        self._cache.invalidate(self._modified)
        self._invalidate_creating()
        while self._added:
            oid, obj = self._added.popitem()
            if obj._p_changed:
                obj._p_changed = False
            del obj._p_oid
            del obj._p_jar
        self._tpc_cleanup()

    def _invalidate_creating(self, creating=None):
        """Disown any objects newly saved in an uncommitted transaction."""
        if creating is None:
            creating = self._creating
            self._creating = {}

        for oid in creating:
            o = self._cache.get(oid)
            if o is not None:
                del self._cache[oid]
                if o._p_changed:
                    o._p_changed = False
                del o._p_jar
                del o._p_oid

    def tpc_vote(self, transaction):
        """Verify that a data manager can commit the transaction."""
        try:
            vote = self._storage.tpc_vote
        except AttributeError:
            return
        try:
            s = vote(transaction)
        except ReadConflictError as v:
            if v.oid:
                self._cache.invalidate(v.oid)
            raise
        if s:
            # Resolved conflicts.
            for oid in s:
                obj = self._cache.get(oid)
                if obj is not None:
                    del obj._p_changed  # transition from changed to ghost

    def tpc_finish(self, transaction):
        """Indicate confirmation that the transaction is done.
        """
        serial = self._storage.tpc_finish(transaction)
        assert type(serial) is bytes, repr(serial)
        for oid_iterator in self._modified, self._creating:
            for oid in oid_iterator:
                obj = self._cache.get(oid)
                # Ignore missing objects and don't update ghosts.
                if obj is not None and obj._p_changed is not None:
                    obj._p_changed = 0
                    obj._p_serial = serial
        self._tpc_cleanup()

    def sortKey(self):
        """Return a consistent sort key for this connection."""
        return "%s:%s" % (self._storage.sortKey(), id(self))

    # Data manager (ISavepointDataManager) methods
    ##########################################################################

    ##########################################################################
    # Transaction-manager synchronization -- ISynchronizer

    def beforeCompletion(self, txn):
        # We don't do anything before a commit starts.
        pass

    def newTransaction(self, transaction, sync=True):
        self._readCurrent.clear()

        try:
            self._storage.sync(sync)
            invalidated = self._storage.poll_invalidations()
            if invalidated is None:
                # special value: the transaction is so old that
                # we need to flush the whole cache.
                invalidated = self._cache.cache_data.copy()
            self._cache.invalidate(invalidated)
        except AttributeError:
            assert self._storage is None

        # Now is a good time to collect some garbage.
        self._cache.incrgc()

    def afterCompletion(self, transaction):
        # Note that we we call newTransaction here for 2 reasons:
        # a) Applying invalidations early frees up resources
        #    early. This is especially useful if the connection isn't
        #    going to be used in a while.
        # b) Non-hygienic applications might start new transactions by
        #    finalizing previous ones without calling begin.  We pass
        #    False to avoid possiblyt expensive sync calls to not
        #    penalize well-behaved applications that call begin.
        self.newTransaction(transaction, False)

    # Transaction-manager synchronization -- ISynchronizer
    ##########################################################################

    ##########################################################################
    # persistent.interfaces.IPersistentDatamanager

    def oldstate(self, obj, tid):
        """Return copy of 'obj' that was written by transaction 'tid'."""
        assert obj._p_jar is self
        p = self._storage.loadSerial(obj._p_oid, tid)
        return self._reader.getState(p)

    def setstate(self, obj):
        """Turns the ghost 'obj' into a real object by loading its state from
        the database."""
        oid = obj._p_oid

        if self.opened is None:
            msg = ("Shouldn't load state for %s %s "
                   "when the connection is closed" %
                   (className(obj), oid_repr(oid)))
            try:
                raise ConnectionStateError(msg)
            except:
                self._log.exception(msg)
                raise

        try:
            p, serial = self._storage.load(oid)

            self._load_count += 1

            self._reader.setGhostState(obj, p)
            obj._p_serial = serial
            self._cache.update_object_size_estimation(oid, len(p))
            obj._p_estimated_size = len(p)

            # Blob support
            if isinstance(obj, Blob):
                obj._p_blob_uncommitted = None
                obj._p_blob_committed = self._storage.loadBlob(oid, serial)

        except ConflictError:
            raise
        except:
            self._log.exception("Couldn't load state for %s %s",
                                className(obj), oid_repr(oid))
            raise

    def register(self, obj):
        """Register obj with the current transaction manager.

        A subclass could override this method to customize the default
        policy of one transaction manager for each thread.

        obj must be an object loaded from this Connection.
        """
        assert obj._p_jar is self
        if obj._p_oid is None:
            # The actual complaint here is that an object without
            # an oid is being registered.  I can't think of any way to
            # achieve that without assignment to _p_jar.  If there is
            # a way, this will be a very confusing exception.
            raise ValueError("assigning to _p_jar is not supported")
        elif obj._p_oid in self._added:
            # It was registered before it was added to _added.
            return
        self._register(obj)

    def _register(self, obj=None):

        # The order here is important.  We need to join before
        # registering the object, because joining may take a
        # savepoint, and the savepoint should not reflect the change
        # to the object.

        if self._needs_to_join:
            self.transaction_manager.get().join(self)
            self._needs_to_join = False

        if obj is not None:
            self._registered_objects.append(obj)

    def readCurrent(self, ob):
        assert ob._p_jar is self
        assert ob._p_oid is not None and ob._p_serial is not None
        if ob._p_serial != z64:
            self._readCurrent[ob._p_oid] = ob._p_serial

    # persistent.interfaces.IPersistentDatamanager
    ##########################################################################

    ##########################################################################
    # PROTECTED stuff (used by e.g. ZODB.DB.DB)

    def _cache_items(self):
        # find all items on the lru list
        items = self._cache.lru_items()
        # fine everything. some on the lru list, some not
        everything = self._cache.cache_data
        # remove those items that are on the lru list
        for k, v in items:
            del everything[k]
        # return a list of [ghosts....not recently used.....recently used]
        return list(everything.items()) + items

    def open(self, transaction_manager=None, delegate=True):
        """Register odb, the DB that this Connection uses.

        This method is called by the DB every time a Connection
        is opened.  Any invalidations received while the Connection
        was closed will be processed.

        If the global module function resetCaches() was called, the
        cache will be cleared.

        Parameters:
        odb: database that owns the Connection
        transaction_manager: transaction manager to use.  None means
            use the default transaction manager.
        register for afterCompletion() calls.
        """

        if transaction_manager is None:
            transaction_manager = transaction.manager

        self.transaction_manager = transaction_manager

        self.opened = time.time()

        if self._reset_counter != global_reset_counter:
            # New code is in place.  Start a new cache.
            self._resetCache()

        # This newTransaction is to deal with some pathalogical cases:
        #
        # a) Someone opens a connection when a transaction isn't
        #    active and proceeeds without calling begin on a
        #    transaction manager. We initialize the transaction for
        #    the connection, but we don't do a storage sync, since
        #    this will be done if a well-nehaved application calls
        #    begin, and we don't want to penalize well-behaved
        #    transactions by syncing twice, as storage syncs might be
        #    expensive.
        # b) Lots of tests assume that connection transaction
        #    information is set on open.
        #
        # Fortunately, this is a cheap operation.  It doesn't really
        # cost much, if anything.
        self.newTransaction(None, False)

        transaction_manager.registerSynch(self)

        if self._cache is not None:
            self._cache.incrgc()  # This is a good time to do some GC

        if delegate:
            # delegate open to secondary connections
            for connection in self.connections.values():
                if connection is not self:
                    connection.open(transaction_manager, False)

    def _resetCache(self):
        """Creates a new cache, discarding the old one.

        See the docstring for the resetCaches() function.
        """
        self._reset_counter = global_reset_counter
        cache_size = self._cache.cache_size
        cache_size_bytes = self._cache.cache_size_bytes
        self._cache = cache = PickleCache(self, cache_size, cache_size_bytes)
        if getattr(self, '_reader', None) is not None:
            self._reader._cache = cache

    def _release_resources(self):
        for c in six.itervalues(self.connections):
            if c._storage is not None:
                c._storage.release()
            c._storage = c._normal_storage = None
            c._cache = PickleCache(self, 0, 0)

    ##########################################################################
    # Python protocol

    def __repr__(self):
        return '<Connection at %08x>' % (positive_id(self), )

    # Python protocol
    ##########################################################################

    ##########################################################################
    # DEPRECATION candidates

    __getitem__ = get

    def exchange(self, old, new):
        # called by a ZClasses method that isn't executed by the test suite
        oid = old._p_oid
        new._p_oid = oid
        new._p_jar = self
        new._p_changed = 1
        self._register(new)
        self._cache[oid] = new

    # DEPRECATION candidates
    ##########################################################################

    ##########################################################################
    # DEPRECATED methods

    # None at present.

    # DEPRECATED methods
    ##########################################################################

    #####################################################################
    # Savepoint support

    def savepoint(self):
        if self._savepoint_storage is None:
            tmpstore = TmpStore(self._normal_storage)
            self._savepoint_storage = tmpstore
            self._storage = self._savepoint_storage

        self._creating.clear()
        self._commit(None)
        self._storage.creating.update(self._creating)
        self._creating.clear()
        self._registered_objects = []

        state = (
            self._storage.position,
            self._storage.index.copy(),
            self._storage.creating.copy(),
        )
        result = Savepoint(self, state)
        # While the interface doesn't guarantee this, savepoints are
        # sometimes used just to "break up" very long transactions, and as
        # a pragmatic matter this is a good time to reduce the cache
        # memory burden.
        self.cacheGC()
        return result

    def _rollback_savepoint(self, state):
        self._abort()
        self._registered_objects = []
        src = self._storage

        # Invalidate objects created *after* the savepoint.
        self._invalidate_creating(
            (oid for oid in src.creating if oid not in state[2]))
        index = src.index
        src.reset(*state)
        self._cache.invalidate(index)

    def _commit_savepoint(self, transaction):
        """Commit all changes made in savepoints and begin 2-phase commit
        """
        src = self._savepoint_storage
        self._storage = self._normal_storage
        self._savepoint_storage = None
        try:
            self._log.debug("Committing savepoints of size %s", src.getSize())
            oids = sorted(src.index.keys())

            # Copy invalidating and creating info from temporary storage:
            self._modified.extend(oids)
            self._creating.update(src.creating)

            for oid in oids:
                data, serial = src.load(oid)
                obj = self._cache.get(oid, None)
                if obj is not None:
                    self._cache.update_object_size_estimation(
                        obj._p_oid, len(data))
                    obj._p_estimated_size = len(data)
                if isinstance(self._reader.getGhost(data), Blob):
                    blobfilename = src.loadBlob(oid, serial)
                    self._storage.storeBlob(oid, serial, data, blobfilename,
                                            '', transaction)
                    # we invalidate the object here in order to ensure
                    # that that the next attribute access of its name
                    # unghostify it, which will cause its blob data
                    # to be reattached "cleanly"
                    self._cache.invalidate(oid)
                else:
                    self._storage.store(oid, serial, data, '', transaction)

                self._readCurrent.pop(oid, None)  # same as in _store_objects()
        finally:
            src.close()

    def _abort_savepoint(self):
        """Discard all savepoint data."""
        src = self._savepoint_storage
        self._invalidate_creating(src.creating)
        self._storage = self._normal_storage
        self._savepoint_storage = None

        # Note: If we invalidate a non-ghostifiable object (i.e. a
        # persistent class), the object will immediately reread it's
        # state.  That means that the following call could result in a
        # call to self.setstate, which, of course, must succeed.  In
        # general, it would be better if the read could be delayed
        # until the start of the next transaction.  If we read at the
        # end of a transaction and if the object was invalidated
        # during this transaction, then we'll read non-current data,
        # which we'll discard later in transaction finalization.  We
        # could, theoretically queue this invalidation by calling
        # self.invalidate.  Unfortunately, attempts to make that
        # change resulted in mysterious test failures.  It's pretty
        # unlikely that the object we are invalidating was invalidated
        # by another thread, so the risk of a reread is pretty low.
        # It's really not worth the effort to pursue this.

        # Note that we do this *after* reseting the storage so that, if
        # data are read, we read it from the reset storage!

        self._cache.invalidate(src.index)

        src.close()

    # Savepoint support
    #####################################################################

    def prefetch(self, *args):
        try:
            self._storage.prefetch(self._prefetch_flatten(args))
        except AttributeError:
            if not hasattr(self._storage, 'prefetch'):
                self.prefetch = lambda *a: None
            else:
                raise

    def _prefetch_flatten(self, args):
        for arg in args:
            if isinstance(arg, bytes):
                yield arg
            elif hasattr(arg, '_p_oid'):
                yield arg._p_oid
            else:
                for ob in arg:
                    if isinstance(ob, bytes):
                        yield ob
                    else:
                        yield ob._p_oid
Example #6
0
def NewObjectReader_load_multi_oid(self, database_name, oid):
    conn = self._conn.get_connection(database_name)
    # TODO, make connection _cache attr public
    reader = ObjectReader(conn, conn._cache, classfactory.ClassFactory)
    return reader.load_oid(oid)
Example #7
0
class Connection(ExportImport, object):
    """Connection to ZODB for loading and storing objects.

    Connections manage object state in collaboration with transaction
    managers.  They're created by calling the
    :meth:`~ZODB.DB.open` method on :py:class:`database
    <ZODB.DB>` objects.
    """

    _code_timestamp = 0

    #: Transaction manager associated with the connection when it was opened.
    transaction_manager = valuedoc.ValueDoc('current transaction manager')

    ##########################################################################
    # Connection methods, ZODB.IConnection

    def __init__(self, db, cache_size=400, before=None, cache_size_bytes=0):
        """Create a new Connection."""

        self._log = logging.getLogger('ZODB.Connection')
        self._debug_info = ()

        self._db = db
        self.large_record_size = db.large_record_size

        # historical connection
        self.before = before

        # Multi-database support
        self.connections = {self._db.database_name: self}

        storage = db._mvcc_storage
        if before:
            try:
                before_instance = storage.before_instance
            except AttributeError:
                def before_instance(before):
                    return HistoricalStorageAdapter(
                        storage.new_instance(), before)
            storage = before_instance(before)
        else:
            storage = storage.new_instance()

        self._normal_storage = self._storage = storage
        self._savepoint_storage = None

        # Do we need to join a txn manager?
        self._needs_to_join = True
        self.transaction_manager = None
        self.opened = None # time.time() when DB.open() opened us

        self._reset_counter = global_reset_counter
        self._load_count = 0   # Number of objects unghosted
        self._store_count = 0  # Number of objects stored

        # Cache which can ghostify (forget the state of) objects not
        # recently used. Its API is roughly that of a dict, with
        # additional gc-related and invalidation-related methods.
        self._cache = PickleCache(self, cache_size, cache_size_bytes)

        # The pre-cache is used by get to avoid infinite loops when
        # objects immediately load their state whern they get their
        # persistent data set.
        self._pre_cache = {}

        # List of all objects (not oids) registered as modified by the
        # persistence machinery, or by add(), or whose access caused a
        # ReadConflictError (just to be able to clean them up from the
        # cache on abort with the other modified objects). All objects
        # of this list are either in _cache or in _added.
        self._registered_objects = [] # [object]

        # ids and serials of objects for which readCurrent was called
        # in a transaction.
        self._readCurrent = {} # {oid ->serial}

        # Dict of oid->obj added explicitly through add(). Used as a
        # preliminary cache until commit time when objects are all moved
        # to the real _cache. The objects are moved to _creating at
        # commit time.
        self._added = {} # {oid -> object}

        # During commit this is turned into a list, which receives
        # objects added as a side-effect of storing a modified object.
        self._added_during_commit = None

        # During commit, all objects go to either _modified or _creating:

        # Dict of oid->flag of new objects (without serial), either
        # added by add() or implicitly added (discovered by the
        # serializer during commit). The flag is True for implicit
        # adding. Used during abort to remove created objects from the
        # _cache, and by persistent_id to check that a new object isn't
        # reachable from multiple databases.
        self._creating = {} # {oid -> implicitly_added_flag}

        # List of oids of modified objects, which have to be invalidated
        # in the cache on abort and in other connections on finish.
        self._modified = [] # [oid]

        # We intend to prevent committing a transaction in which
        # ReadConflictError occurs.  _conflicts is the set of oids that
        # experienced ReadConflictError.  Any time we raise ReadConflictError,
        # the oid should be added to this set, and we should be sure that the
        # object is registered.  Because it's registered, Connection.commit()
        # will raise ReadConflictError again (because the oid is in
        # _conflicts).
        self._conflicts = {}

        # To support importFile(), implemented in the ExportImport base
        # class, we need to run _importDuringCommit() from our commit()
        # method.  If _import is not None, it is a two-tuple of arguments
        # to pass to _importDuringCommit().
        self._import = None

        self._reader = ObjectReader(self, self._cache, self._db.classFactory)

    def new_oid(self):
        return self._storage.new_oid()

    def add(self, obj):
        """Add a new object 'obj' to the database and assign it an oid."""
        if self.opened is None:
            raise ConnectionStateError("The database connection is closed")

        marker = object()
        oid = getattr(obj, "_p_oid", marker)
        if oid is marker:
            raise TypeError("Only first-class persistent objects may be"
                            " added to a Connection.", obj)
        elif obj._p_jar is None:
            self._add(obj, self.new_oid())
        elif obj._p_jar is not self:
            raise InvalidObjectReference(obj, obj._p_jar)

    def _add(self, obj, oid):
        assert obj._p_oid is None
        oid = obj._p_oid = oid
        obj._p_jar = self
        if self._added_during_commit is not None:
            self._added_during_commit.append(obj)
        self._register(obj)
        # Add to _added after calling register(), so that _added
        # can be used as a test for whether the object has been
        # registered with the transaction.
        self._added[oid] = obj

    def get(self, oid):
        """Return the persistent object with oid 'oid'."""
        if self.opened is None:
            raise ConnectionStateError("The database connection is closed")

        obj = self._cache.get(oid, None)
        if obj is not None:
            return obj
        obj = self._added.get(oid, None)
        if obj is not None:
            return obj
        obj = self._pre_cache.get(oid, None)
        if obj is not None:
            return obj

        p, _ = self._storage.load(oid)
        obj = self._reader.getGhost(p)

        # Avoid infiniate loop if obj tries to load its state before
        # it is added to the cache and it's state refers to it.
        # (This will typically be the case for non-ghostifyable objects,
        # like persistent caches.)
        self._pre_cache[oid] = obj
        self._cache.new_ghost(oid, obj)
        self._pre_cache.pop(oid)
        return obj

    def cacheMinimize(self):
        """Deactivate all unmodified objects in the cache.
        """
        for connection in six.itervalues(self.connections):
            connection._cache.minimize()

    # TODO: we should test what happens when cacheGC is called mid-transaction.
    def cacheGC(self):
        """Reduce cache size to target size.
        """
        for connection in six.itervalues(self.connections):
            connection._cache.incrgc()

    __onCloseCallbacks = None
    def onCloseCallback(self, f):
        """Register a callable, f, to be called by close()."""
        if self.__onCloseCallbacks is None:
            self.__onCloseCallbacks = []
        self.__onCloseCallbacks.append(f)

    def close(self, primary=True):
        """Close the Connection."""
        if not self._needs_to_join:
            # We're currently joined to a transaction.
            raise ConnectionStateError("Cannot close a connection joined to "
                                       "a transaction")

        if self._cache is not None:
            self._cache.incrgc() # This is a good time to do some GC

        # Call the close callbacks.
        if self.__onCloseCallbacks is not None:
            callbacks = self.__onCloseCallbacks
            self.__onCloseCallbacks = None
            for f in callbacks:
                try:
                    f()
                except: # except what?
                    f = getattr(f, 'im_self', f)
                    self._log.exception("Close callback failed for %s", f)

        self._debug_info = ()

        if self.opened and self.transaction_manager is not None:
            # transaction_manager could be None if one of the __onCloseCallbacks
            # closed the DB already, .e.g, ZODB.connection() does this.
            self.transaction_manager.unregisterSynch(self)


        am = self._db._activity_monitor
        if am is not None:
            am.closedConnection(self)

        # Drop transaction manager to release resources and help prevent errors
        self.transaction_manager = None

        if hasattr(self._storage, 'afterCompletion'):
            self._storage.afterCompletion()

        if primary:
            for connection in self.connections.values():
                if connection is not self:
                    connection.close(False)

            # Return the connection to the pool.
            if self.opened is not None:
                self._db._returnToPool(self)

                # _returnToPool() set self.opened to None.
                # However, we can't assert that here, because self may
                # have been reused (by another thread) by the time we
                # get back here.
        else:
            self.opened = None

        # We may have been reused by another thread at this point so
        # we can't manipulate or check the state of `self` any more.


    def db(self):
        """Returns a handle to the database this connection belongs to."""
        return self._db

    def isReadOnly(self):
        """Returns True if this connection is read only."""
        if self.opened is None:
            raise ConnectionStateError("The database connection is closed")
        return self._storage.isReadOnly()

    @property
    def root(self):
        """Return the database root object."""
        return RootConvenience(self.get(z64))

    def get_connection(self, database_name):
        """Return a Connection for the named database."""
        connection = self.connections.get(database_name)
        if connection is None:
            new_con = self._db.databases[database_name].open(
                transaction_manager=self.transaction_manager,
                before=self.before,
                )
            self.connections.update(new_con.connections)
            new_con.connections = self.connections
            connection = new_con
        return connection

    def _implicitlyAdding(self, oid):
        """Are we implicitly adding an object within the current transaction

        This is used in a check to avoid implicitly adding an object
        to a database in a multi-database situation.
        See serialize.ObjectWriter.persistent_id.

        """
        return (self._creating.get(oid, 0)
                or
                ((self._savepoint_storage is not None)
                 and
                 self._savepoint_storage.creating.get(oid, 0)
                 )
                )

    def sync(self):
        """Manually update the view on the database."""
        self.transaction_manager.begin()

    def getDebugInfo(self):
        """Returns a tuple with different items for debugging the
        connection.
        """
        return self._debug_info

    def setDebugInfo(self, *args):
        """Add the given items to the debug information of this connection."""
        self._debug_info = self._debug_info + args

    def getTransferCounts(self, clear=False):
        """Returns the number of objects loaded and stored."""
        res = self._load_count, self._store_count
        if clear:
            self._load_count = 0
            self._store_count = 0
        return res

    # Connection methods
    ##########################################################################

    ##########################################################################
    # Data manager (ISavepointDataManager) methods

    def abort(self, transaction):
        """Abort a transaction and forget all changes."""
        # The order is important here.  We want to abort registered
        # objects before we process the cache.  Otherwise, we may un-add
        # objects added in savepoints.  If they've been modified since
        # the savepoint, then they won't have _p_oid or _p_jar after
        # they've been unadded. This will make the code in _abort
        # confused.
        self._abort()

        if self._savepoint_storage is not None:
            self._abort_savepoint()

        self._invalidate_creating()
        self._tpc_cleanup()

    def _abort(self):
        """Abort a transaction and forget all changes."""

        for obj in self._registered_objects:
            oid = obj._p_oid
            assert oid is not None
            if oid in self._added:
                del self._added[oid]
                if self._cache.get(oid) is not None:
                    del self._cache[oid]
                del obj._p_jar
                del obj._p_oid
                if obj._p_changed:
                    obj._p_changed = False
            else:
                # Note: If we invalidate a non-ghostifiable object
                # (i.e. a persistent class), the object will
                # immediately reread its state.  That means that the
                # following call could result in a call to
                # self.setstate, which, of course, must succeed.
                # In general, it would be better if the read could be
                # delayed until the start of the next transaction.  If
                # we read at the end of a transaction and if the
                # object was invalidated during this transaction, then
                # we'll read non-current data, which we'll discard
                # later in transaction finalization.  Unfortnately, we
                # can only delay the read if this abort corresponds to
                # a top-level-transaction abort.  We can't tell if
                # this is a top-level-transaction abort, so we have to
                # go ahead and invalidate now.  Fortunately, it's
                # pretty unlikely that the object we are invalidating
                # was invalidated by another thread, so the risk of a
                # reread is pretty low.

                self._cache.invalidate(oid)

    def _tpc_cleanup(self):
        """Performs cleanup operations to support tpc_finish and tpc_abort."""
        self._conflicts.clear()
        self._needs_to_join = True
        self._registered_objects = []
        self._creating.clear()

    def tpc_begin(self, transaction):
        """Begin commit of a transaction, starting the two-phase commit."""
        self._modified = []
        meta_data = TransactionMetaData(
            transaction.user,
            transaction.description,
            transaction.extension)
        transaction.set_data(self, meta_data)

        # _creating is a list of oids of new objects, which is used to
        # remove them from the cache if a transaction aborts.
        self._creating.clear()
        self._normal_storage.tpc_begin(meta_data)

    def commit(self, transaction):
        """Commit changes to an object"""
        transaction = transaction.data(self)

        if self._savepoint_storage is not None:

            # We first checkpoint the current changes to the savepoint
            self.savepoint()

            # then commit all of the savepoint changes at once
            self._commit_savepoint(transaction)

            # No need to call _commit since savepoint did.

        else:
            self._commit(transaction)

        for oid, serial in six.iteritems(self._readCurrent):
            try:
                self._storage.checkCurrentSerialInTransaction(
                    oid, serial, transaction)
            except ConflictError:
                self._cache.invalidate(oid)
                raise

    def _commit(self, transaction):
        """Commit changes to an object"""

        if self.before is not None:
            raise ReadOnlyHistoryError()

        if self._import:
            # We are importing an export file. We alsways do this
            # while making a savepoint so we can copy export data
            # directly to our storage, typically a TmpStore.
            self._importDuringCommit(transaction, *self._import)
            self._import = None

        # Just in case an object is added as a side-effect of storing
        # a modified object.  If, for example, a __getstate__() method
        # calls add(), the newly added objects will show up in
        # _added_during_commit.  This sounds insane, but has actually
        # happened.

        self._added_during_commit = []

        for obj in self._registered_objects:
            oid = obj._p_oid
            assert oid
            if oid in self._conflicts:
                raise ReadConflictError(object=obj)

            if obj._p_jar is not self:
                raise InvalidObjectReference(obj, obj._p_jar)
            elif oid in self._added:
                assert obj._p_serial == z64
            elif oid in self._creating or not obj._p_changed:
                # Nothing to do.  It's been said that it's legal, e.g., for
                # an object to set _p_changed to false after it's been
                # changed and registered.
                # And new objects that are registered after any referrer are
                # already processed.
                continue

            self._store_objects(ObjectWriter(obj), transaction)

        for obj in self._added_during_commit:
            self._store_objects(ObjectWriter(obj), transaction)
        self._added_during_commit = None

    def _store_objects(self, writer, transaction):
        for obj in writer:
            oid = obj._p_oid
            serial = getattr(obj, "_p_serial", z64)

            if ((serial == z64)
                and
                ((self._savepoint_storage is None)
                 or (oid not in self._savepoint_storage.creating)
                 or self._savepoint_storage.creating[oid]
                 )
                ):

                # obj is a new object

                # Because obj was added, it is now in _creating, so it
                # can be removed from _added.  If oid wasn't in
                # adding, then we are adding it implicitly.

                implicitly_adding = self._added.pop(oid, None) is None

                self._creating[oid] = implicitly_adding

            else:
                self._modified.append(oid)

            p = writer.serialize(obj)  # This calls __getstate__ of obj
            if len(p) >= self.large_record_size:
                warnings.warn(large_object_message % (obj.__class__, len(p)))

            if isinstance(obj, Blob):
                if not IBlobStorage.providedBy(self._storage):
                    raise Unsupported(
                        "Storing Blobs in %s is not supported." %
                        repr(self._storage))
                if obj.opened():
                    raise ValueError("Can't commit with opened blobs.")
                blobfilename = obj._uncommitted()
                if blobfilename is None:
                    assert serial is not None # See _uncommitted
                    self._modified.pop() # not modified
                    continue
                s = self._storage.storeBlob(oid, serial, p, blobfilename,
                                            '', transaction)
                # we invalidate the object here in order to ensure
                # that that the next attribute access of its name
                # unghostify it, which will cause its blob data
                # to be reattached "cleanly"
                obj._p_invalidate()
            else:
                s = self._storage.store(oid, serial, p, '', transaction)

            self._store_count += 1
            # Put the object in the cache before handling the
            # response, just in case the response contains the
            # serial number for a newly created object
            try:
                self._cache[oid] = obj
            except:
                # Dang, I bet it's wrapped:
                # TODO:  Deprecate, then remove, this.
                if hasattr(obj, 'aq_base'):
                    self._cache[oid] = obj.aq_base
                else:
                    raise

            self._cache.update_object_size_estimation(oid, len(p))
            obj._p_estimated_size = len(p)

            # if we write an object, we don't want to check if it was read
            # while current.  This is a convenient choke point to do this.
            self._readCurrent.pop(oid, None)
            if s:
                # savepoint
                obj._p_changed = 0 # transition from changed to up-to-date
                obj._p_serial = s

    def tpc_abort(self, transaction):
        transaction = transaction.data(self)

        if self._import:
            self._import = None

        if self._savepoint_storage is not None:
            self._abort_savepoint()

        self._storage.tpc_abort(transaction)

        # Note: If we invalidate a non-ghostifiable object (i.e. a
        # persistent class), the object will immediately reread its
        # state.  That means that the following call could result in a
        # call to self.setstate, which, of course, must succeed.  In
        # general, it would be better if the read could be delayed
        # until the start of the next transaction.  If we read at the
        # end of a transaction and if the object was invalidated
        # during this transaction, then we'll read non-current data,
        # which we'll discard later in transaction finalization.  We
        # could, theoretically queue this invalidation by calling
        # self.invalidate.  Unfortunately, attempts to make that
        # change resulted in mysterious test failures.  It's pretty
        # unlikely that the object we are invalidating was invalidated
        # by another thread, so the risk of a reread is pretty low.
        # It's really not worth the effort to pursue this.

        self._cache.invalidate(self._modified)
        self._invalidate_creating()
        while self._added:
            oid, obj = self._added.popitem()
            if obj._p_changed:
                obj._p_changed = False
            del obj._p_oid
            del obj._p_jar
        self._tpc_cleanup()

    def _invalidate_creating(self, creating=None):
        """Disown any objects newly saved in an uncommitted transaction."""
        if creating is None:
            creating = self._creating
            self._creating = {}

        for oid in creating:
            o = self._cache.get(oid)
            if o is not None:
                del self._cache[oid]
                if o._p_changed:
                    o._p_changed = False
                del o._p_jar
                del o._p_oid


    def tpc_vote(self, transaction):
        """Verify that a data manager can commit the transaction."""
        try:
            vote = self._storage.tpc_vote
        except AttributeError:
            return

        transaction = transaction.data(self)

        try:
            s = vote(transaction)
        except ReadConflictError as v:
            if v.oid:
                self._cache.invalidate(v.oid)
            raise
        if s:
            # Resolved conflicts.
            for oid in s:
                obj = self._cache.get(oid)
                if obj is not None:
                    del obj._p_changed # transition from changed to ghost

    def tpc_finish(self, transaction):
        """Indicate confirmation that the transaction is done.
        """
        transaction = transaction.data(self)

        serial = self._storage.tpc_finish(transaction)
        assert type(serial) is bytes, repr(serial)
        for oid_iterator in self._modified, self._creating:
            for oid in oid_iterator:
                obj = self._cache.get(oid)
                # Ignore missing objects and don't update ghosts.
                if obj is not None and obj._p_changed is not None:
                    obj._p_changed = 0
                    obj._p_serial = serial
        self._tpc_cleanup()

    def sortKey(self):
        """Return a consistent sort key for this connection."""
        return "%s:%s" % (self._storage.sortKey(), id(self))

    # Data manager (ISavepointDataManager) methods
    ##########################################################################

    ##########################################################################
    # Transaction-manager synchronization -- ISynchronizer

    def beforeCompletion(self, txn):
        # We don't do anything before a commit starts.
        pass

    def newTransaction(self, transaction, sync=True):
        self._readCurrent.clear()

        try:
            self._storage.sync(sync)
            invalidated = self._storage.poll_invalidations()
            if invalidated is None:
                # special value: the transaction is so old that
                # we need to flush the whole cache.
                invalidated = self._cache.cache_data.copy()
            self._cache.invalidate(invalidated)
        except AttributeError:
            assert self._storage is None

    def afterCompletion(self, transaction):
        # Note that we we call newTransaction here for 2 reasons:
        # a) Applying invalidations early frees up resources
        #    early. This is especially useful if the connection isn't
        #    going to be used in a while.
        # b) Non-hygienic applications might start new transactions by
        #    finalizing previous ones without calling begin.  We pass
        #    False to avoid possiblyt expensive sync calls to not
        #    penalize well-behaved applications that call begin.
        if hasattr(self._storage, 'afterCompletion'):
            self._storage.afterCompletion()

        if not self.explicit_transactions:
            self.newTransaction(transaction, False)

        # Now is a good time to collect some garbage.
        self._cache.incrgc()

    # Transaction-manager synchronization -- ISynchronizer
    ##########################################################################

    ##########################################################################
    # persistent.interfaces.IPersistentDatamanager

    def oldstate(self, obj, tid):
        """Return copy of 'obj' that was written by transaction 'tid'."""
        assert obj._p_jar is self
        p = self._storage.loadSerial(obj._p_oid, tid)
        return self._reader.getState(p)

    def setstate(self, obj):
        """Load the state for an (ghost) object
        """

        oid = obj._p_oid

        if self.opened is None:
            msg = ("Shouldn't load state for %s %s "
                   "when the connection is closed"
                   % (className(obj), oid_repr(oid)))
            try:
                raise ConnectionStateError(msg)
            except:
                self._log.exception(msg)
                raise

        try:
            p, serial = self._storage.load(oid)

            self._load_count += 1

            self._reader.setGhostState(obj, p)
            obj._p_serial = serial
            self._cache.update_object_size_estimation(oid, len(p))
            obj._p_estimated_size = len(p)

            # Blob support
            if isinstance(obj, Blob):
                obj._p_blob_uncommitted = None
                obj._p_blob_committed = self._storage.loadBlob(oid, serial)

        except ConflictError:
            raise
        except:
            self._log.exception("Couldn't load state for %s %s",
                                className(obj), oid_repr(oid))
            raise

    def register(self, obj):
        """Register obj with the current transaction manager.

        A subclass could override this method to customize the default
        policy of one transaction manager for each thread.

        obj must be an object loaded from this Connection.
        """
        assert obj._p_jar is self
        if obj._p_oid is None:
            # The actual complaint here is that an object without
            # an oid is being registered.  I can't think of any way to
            # achieve that without assignment to _p_jar.  If there is
            # a way, this will be a very confusing exception.
            raise ValueError("assigning to _p_jar is not supported")
        elif obj._p_oid in self._added:
            # It was registered before it was added to _added.
            return
        self._register(obj)

    def _register(self, obj=None):

        # The order here is important.  We need to join before
        # registering the object, because joining may take a
        # savepoint, and the savepoint should not reflect the change
        # to the object.

        if self._needs_to_join:
            self.transaction_manager.get().join(self)
            self._needs_to_join = False

        if obj is not None:
            self._registered_objects.append(obj)

    def readCurrent(self, ob):
        assert ob._p_jar is self
        assert ob._p_oid is not None and ob._p_serial is not None
        if ob._p_serial != z64:
            self._readCurrent[ob._p_oid] = ob._p_serial

    # persistent.interfaces.IPersistentDatamanager
    ##########################################################################

    ##########################################################################
    # PROTECTED stuff (used by e.g. ZODB.DB.DB)

    def _cache_items(self):
        # find all items on the lru list
        items = self._cache.lru_items()
        # fine everything. some on the lru list, some not
        everything = self._cache.cache_data
        # remove those items that are on the lru list
        for k,v in items:
            del everything[k]
        # return a list of [ghosts....not recently used.....recently used]
        return list(everything.items()) + items

    def open(self, transaction_manager=None, delegate=True):
        """Register odb, the DB that this Connection uses.

        This method is called by the DB every time a Connection
        is opened.  Any invalidations received while the Connection
        was closed will be processed.

        If the global module function resetCaches() was called, the
        cache will be cleared.

        Parameters:
        odb: database that owns the Connection
        transaction_manager: transaction manager to use.  None means
            use the default transaction manager.
        register for afterCompletion() calls.
        """

        if transaction_manager is None:
            transaction_manager = transaction.manager

        self.transaction_manager = transaction_manager

        self.explicit_transactions = getattr(transaction_manager,
                                             'explicit', False)

        self.opened = time.time()

        if self._reset_counter != global_reset_counter:
            # New code is in place.  Start a new cache.
            self._resetCache()

        if not self.explicit_transactions:
            # This newTransaction is to deal with some pathalogical cases:
            #
            # a) Someone opens a connection when a transaction isn't
            #    active and proceeeds without calling begin on a
            #    transaction manager. We initialize the transaction for
            #    the connection, but we don't do a storage sync, since
            #    this will be done if a well-nehaved application calls
            #    begin, and we don't want to penalize well-behaved
            #    transactions by syncing twice, as storage syncs might be
            #    expensive.
            # b) Lots of tests assume that connection transaction
            #    information is set on open.
            #
            # Fortunately, this is a cheap operation.  It doesn't
            # really cost much, if anything.  Well, except for
            # RelStorage, in which case it adds a server round
            # trip.
            self.newTransaction(None, False)

        transaction_manager.registerSynch(self)

        if self._cache is not None:
            self._cache.incrgc() # This is a good time to do some GC

        if delegate:
            # delegate open to secondary connections
            for connection in self.connections.values():
                if connection is not self:
                    connection.open(transaction_manager, False)

    def _resetCache(self):
        """Creates a new cache, discarding the old one.

        See the docstring for the resetCaches() function.
        """
        self._reset_counter = global_reset_counter
        cache_size = self._cache.cache_size
        cache_size_bytes = self._cache.cache_size_bytes
        self._cache = cache = PickleCache(self, cache_size, cache_size_bytes)
        if getattr(self, '_reader', None) is not None:
            self._reader._cache = cache

    def _release_resources(self):
        for c in six.itervalues(self.connections):
            if c._storage is not None:
                c._storage.release()
            c._storage = c._normal_storage = None
            c._cache = PickleCache(self, 0, 0)
            c.transaction_manager = None

    ##########################################################################
    # Python protocol

    def __repr__(self):
        return '<Connection at %08x>' % (positive_id(self),)

    # Python protocol
    ##########################################################################

    ##########################################################################
    # DEPRECATION candidates

    __getitem__ = get

    def exchange(self, old, new):
        # called by a ZClasses method that isn't executed by the test suite
        oid = old._p_oid
        new._p_oid = oid
        new._p_jar = self
        new._p_changed = 1
        self._register(new)
        self._cache[oid] = new

    # DEPRECATION candidates
    ##########################################################################

    ##########################################################################
    # DEPRECATED methods

    # None at present.

    # DEPRECATED methods
    ##########################################################################

    #####################################################################
    # Savepoint support

    def savepoint(self):
        if self._savepoint_storage is None:
            tmpstore = TmpStore(self._normal_storage)
            self._savepoint_storage = tmpstore
            self._storage = self._savepoint_storage

        self._creating.clear()
        self._commit(None)
        self._storage.creating.update(self._creating)
        self._creating.clear()
        self._registered_objects = []

        state = (self._storage.position,
                 self._storage.index.copy(),
                 self._storage.creating.copy(),
                 )
        result = Savepoint(self, state)
        # While the interface doesn't guarantee this, savepoints are
        # sometimes used just to "break up" very long transactions, and as
        # a pragmatic matter this is a good time to reduce the cache
        # memory burden.
        self.cacheGC()
        return result

    def _rollback_savepoint(self, state):
        self._abort()
        self._registered_objects = []
        src = self._storage

        # Invalidate objects created *after* the savepoint.
        self._invalidate_creating((oid for oid in src.creating
                                   if oid not in state[2]))
        index = src.index
        src.reset(*state)
        self._cache.invalidate(index)

    def _commit_savepoint(self, transaction):
        """Commit all changes made in savepoints and begin 2-phase commit
        """
        src = self._savepoint_storage
        self._storage = self._normal_storage
        self._savepoint_storage = None
        try:
            self._log.debug("Committing savepoints of size %s", src.getSize())
            oids = sorted(src.index.keys())

            # Copy invalidating and creating info from temporary storage:
            self._modified.extend(oids)
            self._creating.update(src.creating)

            for oid in oids:
                data, serial = src.load(oid)
                obj = self._cache.get(oid, None)
                if obj is not None:
                    self._cache.update_object_size_estimation(
                        obj._p_oid, len(data))
                    obj._p_estimated_size = len(data)
                if isinstance(self._reader.getGhost(data), Blob):
                    blobfilename = src.loadBlob(oid, serial)
                    self._storage.storeBlob(
                        oid, serial, data, blobfilename,
                        '', transaction)
                    # we invalidate the object here in order to ensure
                    # that that the next attribute access of its name
                    # unghostify it, which will cause its blob data
                    # to be reattached "cleanly"
                    self._cache.invalidate(oid)
                else:
                    self._storage.store(oid, serial, data, '', transaction)

                self._readCurrent.pop(oid, None) # same as in _store_objects()
        finally:
            src.close()

    def _abort_savepoint(self):
        """Discard all savepoint data."""
        src = self._savepoint_storage
        self._invalidate_creating(src.creating)
        self._storage = self._normal_storage
        self._savepoint_storage = None

        # Note: If we invalidate a non-ghostifiable object (i.e. a
        # persistent class), the object will immediately reread it's
        # state.  That means that the following call could result in a
        # call to self.setstate, which, of course, must succeed.  In
        # general, it would be better if the read could be delayed
        # until the start of the next transaction.  If we read at the
        # end of a transaction and if the object was invalidated
        # during this transaction, then we'll read non-current data,
        # which we'll discard later in transaction finalization.  We
        # could, theoretically queue this invalidation by calling
        # self.invalidate.  Unfortunately, attempts to make that
        # change resulted in mysterious test failures.  It's pretty
        # unlikely that the object we are invalidating was invalidated
        # by another thread, so the risk of a reread is pretty low.
        # It's really not worth the effort to pursue this.

        # Note that we do this *after* reseting the storage so that, if
        # data are read, we read it from the reset storage!

        self._cache.invalidate(src.index)

        src.close()

    # Savepoint support
    #####################################################################

    def prefetch(self, *args):
        try:
            self._storage.prefetch(self._prefetch_flatten(args))
        except AttributeError:
            if not hasattr(self._storage, 'prefetch'):
                self.prefetch = lambda *a: None
            else:
                raise

    def _prefetch_flatten(self, args):
        for arg in args:
            if isinstance(arg, bytes):
                yield arg
            elif hasattr(arg, '_p_oid'):
                yield arg._p_oid
            else:
                for ob in arg:
                    if isinstance(ob, bytes):
                        yield ob
                    else:
                        yield ob._p_oid
Example #8
0
    def __init__(self, db, cache_size=400, before=None, cache_size_bytes=0):
        """Create a new Connection."""

        self._log = logging.getLogger('ZODB.Connection')
        self._debug_info = ()

        self._db = db
        self.large_record_size = db.large_record_size

        # historical connection
        self.before = before

        # Multi-database support
        self.connections = {self._db.database_name: self}

        storage = db._mvcc_storage
        if before:
            try:
                before_instance = storage.before_instance
            except AttributeError:
                def before_instance(before):
                    return HistoricalStorageAdapter(
                        storage.new_instance(), before)
            storage = before_instance(before)
        else:
            storage = storage.new_instance()

        self._normal_storage = self._storage = storage
        self._savepoint_storage = None

        # Do we need to join a txn manager?
        self._needs_to_join = True
        self.transaction_manager = None
        self.opened = None # time.time() when DB.open() opened us

        self._reset_counter = global_reset_counter
        self._load_count = 0   # Number of objects unghosted
        self._store_count = 0  # Number of objects stored

        # Cache which can ghostify (forget the state of) objects not
        # recently used. Its API is roughly that of a dict, with
        # additional gc-related and invalidation-related methods.
        self._cache = PickleCache(self, cache_size, cache_size_bytes)

        # The pre-cache is used by get to avoid infinite loops when
        # objects immediately load their state whern they get their
        # persistent data set.
        self._pre_cache = {}

        # List of all objects (not oids) registered as modified by the
        # persistence machinery, or by add(), or whose access caused a
        # ReadConflictError (just to be able to clean them up from the
        # cache on abort with the other modified objects). All objects
        # of this list are either in _cache or in _added.
        self._registered_objects = [] # [object]

        # ids and serials of objects for which readCurrent was called
        # in a transaction.
        self._readCurrent = {} # {oid ->serial}

        # Dict of oid->obj added explicitly through add(). Used as a
        # preliminary cache until commit time when objects are all moved
        # to the real _cache. The objects are moved to _creating at
        # commit time.
        self._added = {} # {oid -> object}

        # During commit this is turned into a list, which receives
        # objects added as a side-effect of storing a modified object.
        self._added_during_commit = None

        # During commit, all objects go to either _modified or _creating:

        # Dict of oid->flag of new objects (without serial), either
        # added by add() or implicitly added (discovered by the
        # serializer during commit). The flag is True for implicit
        # adding. Used during abort to remove created objects from the
        # _cache, and by persistent_id to check that a new object isn't
        # reachable from multiple databases.
        self._creating = {} # {oid -> implicitly_added_flag}

        # List of oids of modified objects, which have to be invalidated
        # in the cache on abort and in other connections on finish.
        self._modified = [] # [oid]

        # We intend to prevent committing a transaction in which
        # ReadConflictError occurs.  _conflicts is the set of oids that
        # experienced ReadConflictError.  Any time we raise ReadConflictError,
        # the oid should be added to this set, and we should be sure that the
        # object is registered.  Because it's registered, Connection.commit()
        # will raise ReadConflictError again (because the oid is in
        # _conflicts).
        self._conflicts = {}

        # To support importFile(), implemented in the ExportImport base
        # class, we need to run _importDuringCommit() from our commit()
        # method.  If _import is not None, it is a two-tuple of arguments
        # to pass to _importDuringCommit().
        self._import = None

        self._reader = ObjectReader(self, self._cache, self._db.classFactory)
Example #9
0
    def __init__(self, db, version='', cache_size=400):
        """Create a new Connection."""

        self._db = db
        self._normal_storage = self._storage = db._storage
        self.new_oid = db._storage.new_oid
        self._savepoint_storage = None

        self.transaction_manager = self._synch = self._mvcc = None


        self._log = logging.getLogger("ZODB.Connection")
        self._debug_info = ()
        self._opened = None # time.time() when DB.open() opened us

        self._version = version
        self._cache = cache = PickleCache(self, cache_size)

        # The pre-cache is used by get to avoid infinite loops when
        # objects immediately load their state whern they get their
        # persistent data set.
        self._pre_cache = {}
        
        if version:
            # Caches for versions end up empty if the version
            # is not used for a while. Non-version caches
            # keep their content indefinitely.
            # Unclear:  Why do we want version caches to behave this way?

            self._cache.cache_drain_resistance = 100

        self._committed = []
        self._added = {}
        self._added_during_commit = None
        self._reset_counter = global_reset_counter
        self._load_count = 0   # Number of objects unghosted
        self._store_count = 0  # Number of objects stored
        self._creating = {}

        # List of oids of modified objects (to be invalidated on an abort).
        self._modified = []

        # List of all objects (not oids) registered as modified by the
        # persistence machinery.
        self._registered_objects = []

        # Do we need to join a txn manager?
        self._needs_to_join = True

        # _invalidated queues invalidate messages delivered from the DB
        # _inv_lock prevents one thread from modifying the set while
        # another is processing invalidations.  All the invalidations
        # from a single transaction should be applied atomically, so
        # the lock must be held when reading _invalidated.

        # It sucks that we have to hold the lock to read _invalidated.
        # Normally, _invalidated is written by calling dict.update, which
        # will execute atomically by virtue of the GIL.  But some storage
        # might generate oids where hash or compare invokes Python code.  In
        # that case, the GIL can't save us.
        # Note:  since that was written, it was officially declared that the
        # type of an oid is str.  TODO:  remove the related now-unnecessary
        # critical sections (if any -- this needs careful thought).

        self._inv_lock = threading.Lock()
        self._invalidated = d = {}

        # We intend to prevent committing a transaction in which
        # ReadConflictError occurs.  _conflicts is the set of oids that
        # experienced ReadConflictError.  Any time we raise ReadConflictError,
        # the oid should be added to this set, and we should be sure that the
        # object is registered.  Because it's registered, Connection.commit()
        # will raise ReadConflictError again (because the oid is in
        # _conflicts).
        self._conflicts = {}

        # If MVCC is enabled, then _mvcc is True and _txn_time stores
        # the upper bound on transactions visible to this connection.
        # That is, all object revisions must be written before _txn_time.
        # If it is None, then the current revisions are acceptable.
        # If the connection is in a version, mvcc will be disabled, because
        # loadBefore() only returns non-version data.
        self._txn_time = None

        # To support importFile(), implemented in the ExportImport base
        # class, we need to run _importDuringCommit() from our commit()
        # method.  If _import is not None, it is a two-tuple of arguments
        # to pass to _importDuringCommit().
        self._import = None


        self._reader = ObjectReader(self, self._cache, self._db.classFactory)

        # Multi-database support
        self.connections = {self._db.database_name: self}
Example #10
0
class Connection(ExportImport, object):
    """Connection to ZODB for loading and storing objects."""

    implements(IConnection,
               ISavepointDataManager,
               IPersistentDataManager,
               ISynchronizer)


    _code_timestamp = 0

    ##########################################################################
    # Connection methods, ZODB.IConnection

    def __init__(self, db, version='', cache_size=400):
        """Create a new Connection."""

        self._db = db
        self._normal_storage = self._storage = db._storage
        self.new_oid = db._storage.new_oid
        self._savepoint_storage = None

        self.transaction_manager = self._synch = self._mvcc = None


        self._log = logging.getLogger("ZODB.Connection")
        self._debug_info = ()
        self._opened = None # time.time() when DB.open() opened us

        self._version = version
        self._cache = cache = PickleCache(self, cache_size)

        # The pre-cache is used by get to avoid infinite loops when
        # objects immediately load their state whern they get their
        # persistent data set.
        self._pre_cache = {}
        
        if version:
            # Caches for versions end up empty if the version
            # is not used for a while. Non-version caches
            # keep their content indefinitely.
            # Unclear:  Why do we want version caches to behave this way?

            self._cache.cache_drain_resistance = 100

        self._committed = []
        self._added = {}
        self._added_during_commit = None
        self._reset_counter = global_reset_counter
        self._load_count = 0   # Number of objects unghosted
        self._store_count = 0  # Number of objects stored
        self._creating = {}

        # List of oids of modified objects (to be invalidated on an abort).
        self._modified = []

        # List of all objects (not oids) registered as modified by the
        # persistence machinery.
        self._registered_objects = []

        # Do we need to join a txn manager?
        self._needs_to_join = True

        # _invalidated queues invalidate messages delivered from the DB
        # _inv_lock prevents one thread from modifying the set while
        # another is processing invalidations.  All the invalidations
        # from a single transaction should be applied atomically, so
        # the lock must be held when reading _invalidated.

        # It sucks that we have to hold the lock to read _invalidated.
        # Normally, _invalidated is written by calling dict.update, which
        # will execute atomically by virtue of the GIL.  But some storage
        # might generate oids where hash or compare invokes Python code.  In
        # that case, the GIL can't save us.
        # Note:  since that was written, it was officially declared that the
        # type of an oid is str.  TODO:  remove the related now-unnecessary
        # critical sections (if any -- this needs careful thought).

        self._inv_lock = threading.Lock()
        self._invalidated = d = {}

        # We intend to prevent committing a transaction in which
        # ReadConflictError occurs.  _conflicts is the set of oids that
        # experienced ReadConflictError.  Any time we raise ReadConflictError,
        # the oid should be added to this set, and we should be sure that the
        # object is registered.  Because it's registered, Connection.commit()
        # will raise ReadConflictError again (because the oid is in
        # _conflicts).
        self._conflicts = {}

        # If MVCC is enabled, then _mvcc is True and _txn_time stores
        # the upper bound on transactions visible to this connection.
        # That is, all object revisions must be written before _txn_time.
        # If it is None, then the current revisions are acceptable.
        # If the connection is in a version, mvcc will be disabled, because
        # loadBefore() only returns non-version data.
        self._txn_time = None

        # To support importFile(), implemented in the ExportImport base
        # class, we need to run _importDuringCommit() from our commit()
        # method.  If _import is not None, it is a two-tuple of arguments
        # to pass to _importDuringCommit().
        self._import = None


        self._reader = ObjectReader(self, self._cache, self._db.classFactory)

        # Multi-database support
        self.connections = {self._db.database_name: self}


    def add(self, obj):
        """Add a new object 'obj' to the database and assign it an oid."""
        if self._opened is None:
            raise ConnectionStateError("The database connection is closed")

        marker = object()
        oid = getattr(obj, "_p_oid", marker)
        if oid is marker:
            raise TypeError("Only first-class persistent objects may be"
                            " added to a Connection.", obj)
        elif obj._p_jar is None:
            assert obj._p_oid is None
            oid = obj._p_oid = self._storage.new_oid()
            obj._p_jar = self
            if self._added_during_commit is not None:
                self._added_during_commit.append(obj)
            self._register(obj)
            # Add to _added after calling register(), so that _added
            # can be used as a test for whether the object has been
            # registered with the transaction.
            self._added[oid] = obj
        elif obj._p_jar is not self:
            raise InvalidObjectReference(obj, obj._p_jar)

    def get(self, oid):
        """Return the persistent object with oid 'oid'."""
        if self._opened is None:
            raise ConnectionStateError("The database connection is closed")

        obj = self._pre_cache.get(oid, None)
        if obj is not None:
            return obj
        obj = self._cache.get(oid, None)
        if obj is not None:
            return obj
        obj = self._added.get(oid, None)
        if obj is not None:
            return obj

        p, serial = self._storage.load(oid, self._version)
        obj = self._reader.getGhost(p)

        # Avoid infiniate loop if obj tries to load its state before
        # it is added to the cache and it's state refers to it.
        self._pre_cache[oid] = obj
        obj._p_oid = oid
        obj._p_jar = self
        obj._p_changed = None
        obj._p_serial = serial
        self._pre_cache.pop(oid)
        self._cache[oid] = obj
        return obj

    def cacheMinimize(self):
        """Deactivate all unmodified objects in the cache."""
        self._cache.minimize()

    # TODO: we should test what happens when cacheGC is called mid-transaction.
    def cacheGC(self):
        """Reduce cache size to target size."""
        self._cache.incrgc()

    __onCloseCallbacks = None
    def onCloseCallback(self, f):
        """Register a callable, f, to be called by close()."""
        if self.__onCloseCallbacks is None:
            self.__onCloseCallbacks = []
        self.__onCloseCallbacks.append(f)

    def close(self, primary=True):
        """Close the Connection."""
        if not self._needs_to_join:
            # We're currently joined to a transaction.
            raise ConnectionStateError("Cannot close a connection joined to "
                                       "a transaction")

        if self._cache is not None:
            self._cache.incrgc() # This is a good time to do some GC

        # Call the close callbacks.
        if self.__onCloseCallbacks is not None:
            for f in self.__onCloseCallbacks:
                try:
                    f()
                except: # except what?
                    f = getattr(f, 'im_self', f)
                    self._log.error("Close callback failed for %s", f,
                                    exc_info=sys.exc_info())
            self.__onCloseCallbacks = None

        self._debug_info = ()

        if self._synch:
            self.transaction_manager.unregisterSynch(self)
            self._synch = None

        if primary:
            for connection in self.connections.values():
                if connection is not self:
                    connection.close(False)

            # Return the connection to the pool.
            if self._opened is not None:
                self._db._returnToPool(self)

                # _returnToPool() set self._opened to None.
                # However, we can't assert that here, because self may
                # have been reused (by another thread) by the time we
                # get back here.
        else:
            self._opened = None

    def db(self):
        """Returns a handle to the database this connection belongs to."""
        return self._db

    def isReadOnly(self):
        """Returns True if the storage for this connection is read only."""
        if self._opened is None:
            raise ConnectionStateError("The database connection is closed")
        return self._storage.isReadOnly()

    def invalidate(self, tid, oids):
        """Notify the Connection that transaction 'tid' invalidated oids."""
        self._inv_lock.acquire()
        try:
            if self._txn_time is None:
                self._txn_time = tid
            self._invalidated.update(oids)
        finally:
            self._inv_lock.release()

    def root(self):
        """Return the database root object."""
        return self.get(z64)

    def getVersion(self):
        """Returns the version this connection is attached to."""
        if self._storage is None:
            raise ConnectionStateError("The database connection is closed")
        return self._version

    def get_connection(self, database_name):
        """Return a Connection for the named database."""
        connection = self.connections.get(database_name)
        if connection is None:
            new_con = self._db.databases[database_name].open(
                transaction_manager=self.transaction_manager,
                mvcc=self._mvcc, version=self._version, synch=self._synch,
                )
            self.connections.update(new_con.connections)
            new_con.connections = self.connections
            connection = new_con
        return connection

    def _implicitlyAdding(self, oid):
        """Are we implicitly adding an object within the current transaction

        This is used in a check to avoid implicitly adding an object
        to a database in a multi-database situation.
        See serialize.ObjectWriter.persistent_id.

        """
        return (self._creating.get(oid, 0)
                or
                ((self._savepoint_storage is not None)
                 and
                 self._savepoint_storage.creating.get(oid, 0)
                 )
                )

    def sync(self):
        """Manually update the view on the database."""
        self.transaction_manager.abort()
        self._storage_sync()

    def getDebugInfo(self):
        """Returns a tuple with different items for debugging the
        connection.
        """
        return self._debug_info

    def setDebugInfo(self, *args):
        """Add the given items to the debug information of this connection."""
        self._debug_info = self._debug_info + args

    def getTransferCounts(self, clear=False):
        """Returns the number of objects loaded and stored."""
        res = self._load_count, self._store_count
        if clear:
            self._load_count = 0
            self._store_count = 0
        return res

    # Connection methods
    ##########################################################################

    ##########################################################################
    # Data manager (ISavepointDataManager) methods

    def abort(self, transaction):
        """Abort a transaction and forget all changes."""

        # The order is important here.  We want to abort registered
        # objects before we process the cache.  Otherwise, we may un-add
        # objects added in savepoints.  If they've been modified since
        # the savepoint, then they won't have _p_oid or _p_jar after
        # they've been unadded. This will make the code in _abort
        # confused.

        self._abort()

        if self._savepoint_storage is not None:
            self._abort_savepoint()

        self._tpc_cleanup()

    def _abort(self):
        """Abort a transaction and forget all changes."""

        for obj in self._registered_objects:
            oid = obj._p_oid
            assert oid is not None
            if oid in self._added:
                del self._added[oid]
                del obj._p_jar
                del obj._p_oid
            else:

                # Note: If we invalidate a non-ghostifiable object
                # (i.e. a persistent class), the object will
                # immediately reread its state.  That means that the
                # following call could result in a call to
                # self.setstate, which, of course, must succeed.
                # In general, it would be better if the read could be
                # delayed until the start of the next transaction.  If
                # we read at the end of a transaction and if the
                # object was invalidated during this transaction, then
                # we'll read non-current data, which we'll discard
                # later in transaction finalization.  Unfortnately, we
                # can only delay the read if this abort corresponds to
                # a top-level-transaction abort.  We can't tell if
                # this is a top-level-transaction abort, so we have to
                # go ahead and invalidate now.  Fortunately, it's
                # pretty unlikely that the object we are invalidating
                # was invalidated by another thread, so the risk of a
                # reread is pretty low.

                self._cache.invalidate(oid)

    def _tpc_cleanup(self):
        """Performs cleanup operations to support tpc_finish and tpc_abort."""
        self._conflicts.clear()
        if not self._synch:
            self._flush_invalidations()
        self._needs_to_join = True
        self._registered_objects = []
        self._creating.clear()

    # Process pending invalidations.
    def _flush_invalidations(self):
        self._inv_lock.acquire()
        try:
            # Non-ghostifiable objects may need to read when they are
            # invalidated, so we'll quickly just replace the
            # invalidating dict with a new one.  We'll then process
            # the invalidations after freeing the lock *and* after
            # resetting the time.  This means that invalidations will
            # happen after the start of the transactions.  They are
            # subject to conflict errors and to reading old data.

            # TODO: There is a potential problem lurking for persistent
            # classes.  Suppose we have an invalidation of a persistent
            # class and of an instance.  If the instance is
            # invalidated first and if the invalidation logic uses
            # data read from the class, then the invalidation could
            # be performed with stale data.  Or, suppose that there
            # are instances of the class that are freed as a result of
            # invalidating some object.  Perhaps code in their __del__
            # uses class data.  Really, the only way to properly fix
            # this is to, in fact, make classes ghostifiable.  Then
            # we'd have to reimplement attribute lookup to check the
            # class state and, if necessary, activate the class.  It's
            # much worse than that though, because we'd also need to
            # deal with slots.  When a class is ghostified, we'd need
            # to replace all of the slot operations with versions that
            # reloaded the object when called. It's hard to say which
            # is better or worse.  For now, it seems the risk of
            # using a class while objects are being invalidated seems
            # small enough to be acceptable.

            invalidated = self._invalidated
            self._invalidated = {}
            self._txn_time = None
        finally:
            self._inv_lock.release()

        self._cache.invalidate(invalidated)

        # Now is a good time to collect some garbage.
        self._cache.incrgc()

    def tpc_begin(self, transaction):
        """Begin commit of a transaction, starting the two-phase commit."""
        self._modified = []

        # _creating is a list of oids of new objects, which is used to
        # remove them from the cache if a transaction aborts.
        self._creating.clear()
        self._normal_storage.tpc_begin(transaction)

    def commit(self, transaction):
        """Commit changes to an object"""

        if self._savepoint_storage is not None:

            # We first checkpoint the current changes to the savepoint
            self.savepoint()

            # then commit all of the savepoint changes at once
            self._commit_savepoint(transaction)

            # No need to call _commit since savepoint did.

        else:
            self._commit(transaction)

    def _commit(self, transaction):
        """Commit changes to an object"""

        if self._import:
            # We are importing an export file. We alsways do this
            # while making a savepoint so we can copy export data
            # directly to out storage, typically a TmpStore.
            self._importDuringCommit(transaction, *self._import)
            self._import = None

        # Just in case an object is added as a side-effect of storing
        # a modified object.  If, for example, a __getstate__() method
        # calls add(), the newly added objects will show up in
        # _added_during_commit.  This sounds insane, but has actually
        # happened.

        self._added_during_commit = []

        for obj in self._registered_objects:
            oid = obj._p_oid
            assert oid
            if oid in self._conflicts:
                raise ReadConflictError(object=obj)

            if obj._p_jar is not self:
                raise InvalidObjectReference(obj, obj._p_jar)
            elif oid in self._added:
                assert obj._p_serial == z64
            elif obj._p_changed:
                if oid in self._invalidated:
                    resolve = getattr(obj, "_p_resolveConflict", None)
                    if resolve is None:
                        raise ConflictError(object=obj)
                self._modified.append(oid)
            else:
                # Nothing to do.  It's been said that it's legal, e.g., for
                # an object to set _p_changed to false after it's been
                # changed and registered.
                continue

            self._store_objects(ObjectWriter(obj), transaction)

        for obj in self._added_during_commit:
            self._store_objects(ObjectWriter(obj), transaction)
        self._added_during_commit = None

    def _store_objects(self, writer, transaction):
        for obj in writer:
            oid = obj._p_oid
            serial = getattr(obj, "_p_serial", z64)

            if serial == z64:
                # obj is a new object

                # Because obj was added, it is now in _creating, so it
                # can be removed from _added.  If oid wasn't in
                # adding, then we are adding it implicitly.

                implicitly_adding = self._added.pop(oid, None) is None

                self._creating[oid] = implicitly_adding

            else:
                if (oid in self._invalidated
                    and not hasattr(obj, '_p_resolveConflict')):
                    raise ConflictError(object=obj)
                self._modified.append(oid)
            p = writer.serialize(obj)  # This calls __getstate__ of obj
            s = self._storage.store(oid, serial, p, self._version, transaction)
            self._store_count += 1
            # Put the object in the cache before handling the
            # response, just in case the response contains the
            # serial number for a newly created object
            try:
                self._cache[oid] = obj
            except:
                # Dang, I bet it's wrapped:
                # TODO:  Deprecate, then remove, this.
                if hasattr(obj, 'aq_base'):
                    self._cache[oid] = obj.aq_base
                else:
                    raise

            self._handle_serial(s, oid)

    def _handle_serial(self, store_return, oid=None, change=1):
        """Handle the returns from store() and tpc_vote() calls."""

        # These calls can return different types depending on whether
        # ZEO is used.  ZEO uses asynchronous returns that may be
        # returned in batches by the ClientStorage.  ZEO1 can also
        # return an exception object and expect that the Connection
        # will raise the exception.

        # When commit_sub() exceutes a store, there is no need to
        # update the _p_changed flag, because the subtransaction
        # tpc_vote() calls already did this.  The change=1 argument
        # exists to allow commit_sub() to avoid setting the flag
        # again.

        # When conflict resolution occurs, the object state held by
        # the connection does not match what is written to the
        # database.  Invalidate the object here to guarantee that
        # the new state is read the next time the object is used.

        if not store_return:
            return
        if isinstance(store_return, str):
            assert oid is not None
            self._handle_one_serial(oid, store_return, change)
        else:
            for oid, serial in store_return:
                self._handle_one_serial(oid, serial, change)

    def _handle_one_serial(self, oid, serial, change):
        if not isinstance(serial, str):
            raise serial
        obj = self._cache.get(oid, None)
        if obj is None:
            return
        if serial == ResolvedSerial:
            del obj._p_changed # transition from changed to ghost
        else:
            if change:
                obj._p_changed = 0 # transition from changed to up-to-date
            obj._p_serial = serial

    def tpc_abort(self, transaction):
        if self._import:
            self._import = None

        if self._savepoint_storage is not None:
            self._abort_savepoint()

        self._storage.tpc_abort(transaction)

        # Note: If we invalidate a non-ghostifiable object (i.e. a
        # persistent class), the object will immediately reread its
        # state.  That means that the following call could result in a
        # call to self.setstate, which, of course, must succeed.  In
        # general, it would be better if the read could be delayed
        # until the start of the next transaction.  If we read at the
        # end of a transaction and if the object was invalidated
        # during this transaction, then we'll read non-current data,
        # which we'll discard later in transaction finalization.  We
        # could, theoretically queue this invalidation by calling
        # self.invalidate.  Unfortunately, attempts to make that
        # change resulted in mysterious test failures.  It's pretty
        # unlikely that the object we are invalidating was invalidated
        # by another thread, so the risk of a reread is pretty low.
        # It's really not worth the effort to pursue this.

        self._cache.invalidate(self._modified)
        self._invalidate_creating()
        while self._added:
            oid, obj = self._added.popitem()
            del obj._p_oid
            del obj._p_jar
        self._tpc_cleanup()

    def _invalidate_creating(self, creating=None):
        """Disown any objects newly saved in an uncommitted transaction."""
        if creating is None:
            creating = self._creating
            self._creating = {}

        for oid in creating:
            o = self._cache.get(oid)
            if o is not None:
                del self._cache[oid]
                del o._p_jar
                del o._p_oid

    def tpc_vote(self, transaction):
        """Verify that a data manager can commit the transaction."""
        try:
            vote = self._storage.tpc_vote
        except AttributeError:
            return
        s = vote(transaction)
        self._handle_serial(s)

    def tpc_finish(self, transaction):
        """Indicate confirmation that the transaction is done."""

        def callback(tid):
            d = dict.fromkeys(self._modified)
            self._db.invalidate(tid, d, self)
#       It's important that the storage calls the passed function
#       while it still has its lock.  We don't want another thread
#       to be able to read any updated data until we've had a chance
#       to send an invalidation message to all of the other
#       connections!
        self._storage.tpc_finish(transaction, callback)
        self._tpc_cleanup()

    def sortKey(self):
        """Return a consistent sort key for this connection."""
        return "%s:%s" % (self._storage.sortKey(), id(self))

    # Data manager (ISavepointDataManager) methods
    ##########################################################################

    ##########################################################################
    # Transaction-manager synchronization -- ISynchronizer

    def beforeCompletion(self, txn):
        # We don't do anything before a commit starts.
        pass

    # Call the underlying storage's sync() method (if any), and process
    # pending invalidations regardless.  Of course this should only be
    # called at transaction boundaries.
    def _storage_sync(self, *ignored):
        sync = getattr(self._storage, 'sync', 0)
        if sync:
            sync()
        self._flush_invalidations()

    afterCompletion =  _storage_sync
    newTransaction = _storage_sync

     # Transaction-manager synchronization -- ISynchronizer
    ##########################################################################

    ##########################################################################
    # persistent.interfaces.IPersistentDatamanager

    def oldstate(self, obj, tid):
        """Return copy of 'obj' that was written by transaction 'tid'."""
        assert obj._p_jar is self
        p = self._storage.loadSerial(obj._p_oid, tid)
        return self._reader.getState(p)

    def setstate(self, obj):
        """Turns the ghost 'obj' into a real object by loading it's from the
        database."""
        oid = obj._p_oid

        if self._opened is None:
            msg = ("Shouldn't load state for %s "
                   "when the connection is closed" % oid_repr(oid))
            self._log.error(msg)
            raise ConnectionStateError(msg)

        try:
            self._setstate(obj)
        except ConflictError:
            raise
        except:
            self._log.error("Couldn't load state for %s", oid_repr(oid),
                            exc_info=sys.exc_info())
            raise

    def _setstate(self, obj):
        # Helper for setstate(), which provides logging of failures.

        # The control flow is complicated here to avoid loading an
        # object revision that we are sure we aren't going to use.  As
        # a result, invalidation tests occur before and after the
        # load.  We can only be sure about invalidations after the
        # load.

        # If an object has been invalidated, there are several cases
        # to consider:
        # 1. Check _p_independent()
        # 2. Try MVCC
        # 3. Raise ConflictError.

        # Does anything actually use _p_independent()?  It would simplify
        # the code if we could drop support for it.

        # There is a harmless data race with self._invalidated.  A
        # dict update could go on in another thread, but we don't care
        # because we have to check again after the load anyway.

        if (obj._p_oid in self._invalidated and
                not myhasattr(obj, "_p_independent")):
            # If the object has _p_independent(), we will handle it below.
            self._load_before_or_conflict(obj)
            return

        p, serial = self._storage.load(obj._p_oid, self._version)
        self._load_count += 1

        self._inv_lock.acquire()
        try:
            invalid = obj._p_oid in self._invalidated
        finally:
            self._inv_lock.release()

        if invalid:
            if myhasattr(obj, "_p_independent"):
                # This call will raise a ReadConflictError if something
                # goes wrong
                self._handle_independent(obj)
            else:
                self._load_before_or_conflict(obj)
                return

        self._reader.setGhostState(obj, p)
        obj._p_serial = serial

    def _load_before_or_conflict(self, obj):
        """Load non-current state for obj or raise ReadConflictError."""
        if not (self._mvcc and self._setstate_noncurrent(obj)):
            self._register(obj)
            self._conflicts[obj._p_oid] = True
            raise ReadConflictError(object=obj)

    def _setstate_noncurrent(self, obj):
        """Set state using non-current data.

        Return True if state was available, False if not.
        """
        try:
            # Load data that was current before the commit at txn_time.
            t = self._storage.loadBefore(obj._p_oid, self._txn_time)
        except KeyError:
            return False
        if t is None:
            return False
        data, start, end = t
        # The non-current transaction must have been written before
        # txn_time.  It must be current at txn_time, but could have
        # been modified at txn_time.

        assert start < self._txn_time, (u64(start), u64(self._txn_time))
        assert end is not None
        assert self._txn_time <= end, (u64(self._txn_time), u64(end))
        self._reader.setGhostState(obj, data)
        obj._p_serial = start
        return True

    def _handle_independent(self, obj):
        # Helper method for setstate() handles possibly independent objects
        # Call _p_independent(), if it returns True, setstate() wins.
        # Otherwise, raise a ConflictError.

        if obj._p_independent():
            self._inv_lock.acquire()
            try:
                try:
                    del self._invalidated[obj._p_oid]
                except KeyError:
                    pass
            finally:
                self._inv_lock.release()
        else:
            self._conflicts[obj._p_oid] = 1
            self._register(obj)
            raise ReadConflictError(object=obj)

    def register(self, obj):
        """Register obj with the current transaction manager.

        A subclass could override this method to customize the default
        policy of one transaction manager for each thread.

        obj must be an object loaded from this Connection.
        """
        assert obj._p_jar is self
        if obj._p_oid is None:
            # The actual complaint here is that an object without
            # an oid is being registered.  I can't think of any way to
            # achieve that without assignment to _p_jar.  If there is
            # a way, this will be a very confusing exception.
            raise ValueError("assigning to _p_jar is not supported")
        elif obj._p_oid in self._added:
            # It was registered before it was added to _added.
            return
        self._register(obj)

    def _register(self, obj=None):

        # The order here is important.  We need to join before
        # registering the object, because joining may take a
        # savepoint, and the savepoint should not reflect the change
        # to the object.

        if self._needs_to_join:
            self.transaction_manager.get().join(self)
            self._needs_to_join = False

        if obj is not None:
            self._registered_objects.append(obj)


    # persistent.interfaces.IPersistentDatamanager
    ##########################################################################

    ##########################################################################
    # PROTECTED stuff (used by e.g. ZODB.DB.DB)

    def _cache_items(self):
        # find all items on the lru list
        items = self._cache.lru_items()
        # fine everything. some on the lru list, some not
        everything = self._cache.cache_data
        # remove those items that are on the lru list
        for k,v in items:
            del everything[k]
        # return a list of [ghosts....not recently used.....recently used]
        return everything.items() + items

    def open(self, transaction_manager=None, mvcc=True, synch=True,
             delegate=True):
        """Register odb, the DB that this Connection uses.

        This method is called by the DB every time a Connection
        is opened.  Any invalidations received while the Connection
        was closed will be processed.

        If the global module function resetCaches() was called, the
        cache will be cleared.

        Parameters:
        odb: database that owns the Connection
        mvcc: boolean indicating whether MVCC is enabled
        transaction_manager: transaction manager to use.  None means
            use the default transaction manager.
        synch: boolean indicating whether Connection should
        register for afterCompletion() calls.
        """

        # TODO:  Why do we go to all the trouble of setting _db and
        # other attributes on open and clearing them on close?
        # A Connection is only ever associated with a single DB
        # and Storage.

        self._opened = time()
        self._synch = synch
        self._mvcc = mvcc and not self._version

        if transaction_manager is None:
            transaction_manager = transaction.manager

        self.transaction_manager = transaction_manager

        if self._reset_counter != global_reset_counter:
            # New code is in place.  Start a new cache.
            self._resetCache()
        else:
            self._flush_invalidations()

        if synch:
            transaction_manager.registerSynch(self)

        if self._cache is not None:
            self._cache.incrgc() # This is a good time to do some GC

        if delegate:
            # delegate open to secondary connections
            for connection in self.connections.values():
                if connection is not self:
                    connection.open(transaction_manager, mvcc, synch, False)

    def _resetCache(self):
        """Creates a new cache, discarding the old one.

        See the docstring for the resetCaches() function.
        """
        self._reset_counter = global_reset_counter
        self._invalidated.clear()
        cache_size = self._cache.cache_size
        self._cache = cache = PickleCache(self, cache_size)
        if getattr(self, '_reader', None) is not None:
            self._reader._cache = cache

    ##########################################################################
    # Python protocol

    def __repr__(self):
        if self._version:
            ver = ' (in version %s)' % `self._version`
        else:
            ver = ''
        return '<Connection at %08x%s>' % (positive_id(self), ver)

    # Python protocol
    ##########################################################################

    ##########################################################################
    # DEPRECATION candidates

    __getitem__ = get

    def modifiedInVersion(self, oid):
        """Returns the version the object with the given oid was modified in.

        If it wasn't modified in a version, the current version of this
        connection is returned.
        """
        try:
            return self._db.modifiedInVersion(oid)
        except KeyError:
            return self.getVersion()

    def exchange(self, old, new):
        # called by a ZClasses method that isn't executed by the test suite
        oid = old._p_oid
        new._p_oid = oid
        new._p_jar = self
        new._p_changed = 1
        self._register(new)
        self._cache[oid] = new

    # DEPRECATION candidates
    ##########################################################################

    ##########################################################################
    # DEPRECATED methods

    # None at present.

    # DEPRECATED methods
    ##########################################################################

    #####################################################################
    # Savepoint support

    def savepoint(self):
        if self._savepoint_storage is None:
            self._savepoint_storage = TmpStore(self._version,
                                               self._normal_storage)
            self._storage = self._savepoint_storage

        self._creating.clear()
        self._commit(None)
        self._storage.creating.update(self._creating)
        self._creating.clear()
        self._registered_objects = []

        state = self._storage.position, self._storage.index.copy()
        result = Savepoint(self, state)
        # While the interface doesn't guarantee this, savepoints are
        # sometimes used just to "break up" very long transactions, and as
        # a pragmatic matter this is a good time to reduce the cache
        # memory burden.
        self.cacheGC()
        return result

    def _rollback(self, state):
        self._abort()
        self._registered_objects = []
        src = self._storage
        self._cache.invalidate(src.index)
        src.reset(*state)

    def _commit_savepoint(self, transaction):
        """Commit all changes made in subtransactions and begin 2-phase commit
        """
        src = self._savepoint_storage
        self._storage = self._normal_storage
        self._savepoint_storage = None

        self._log.debug("Commiting savepoints of size %s", src.getSize())
        oids = src.index.keys()

        # Copy invalidating and creating info from temporary storage:
        self._modified.extend(oids)
        self._creating.update(src.creating)

        for oid in oids:
            data, serial = src.load(oid, src)
            s = self._storage.store(oid, serial, data,
                                    self._version, transaction)
            self._handle_serial(s, oid, change=False)

        src.close()

    def _abort_savepoint(self):
        """Discard all subtransaction data."""
        src = self._savepoint_storage
        self._storage = self._normal_storage
        self._savepoint_storage = None

        # Note: If we invalidate a non-ghostifiable object (i.e. a
        # persistent class), the object will immediately reread it's
        # state.  That means that the following call could result in a
        # call to self.setstate, which, of course, must succeed.  In
        # general, it would be better if the read could be delayed
        # until the start of the next transaction.  If we read at the
        # end of a transaction and if the object was invalidated
        # during this transaction, then we'll read non-current data,
        # which we'll discard later in transaction finalization.  We
        # could, theoretically queue this invalidation by calling
        # self.invalidate.  Unfortunately, attempts to make that
        # change resulted in mysterious test failures.  It's pretty
        # unlikely that the object we are invalidating was invalidated
        # by another thread, so the risk of a reread is pretty low.
        # It's really not worth the effort to pursue this.

        self._cache.invalidate(src.index)
        self._invalidate_creating(src.creating)
        src.close()
Example #11
0
 def load_persistent(self, oid, klass):
     if self._counter >= self._chunk_size:
         self.garbage_collect_cache()
     ob = ObjectReader.load_persistent(self, oid, klass)
     self._counter += 1
     return ob
Example #12
0
 def load_oid(self, oid):
     if self.oid_set is not None:
         if self.lazy:
             return self.lazy(oid)
         self.oid_set.add(oid)
     return ObjectReader.load_oid(self, oid)
 def load_oid(self, oid):
   if self.oid_set is not None:
     if self.lazy:
       return self.lazy(oid)
     self.oid_set.add(oid)
   return ObjectReader.load_oid(self, oid)
Example #14
0
    def __init__(self, db, version='', cache_size=400, cache_size_bytes=0):
        """Create a new Connection."""

        self._log = logging.getLogger('ZODB.Connection')
        self._debug_info = ()

        self._db = db
        # Multi-database support
        self.connections = {self._db.database_name: self}

        self._version = version
        self._normal_storage = self._storage = db._storage
        self.new_oid = db._storage.new_oid
        self._savepoint_storage = None

        # Do we need to join a txn manager?
        self._needs_to_join = True
        self.transaction_manager = None
        self._opened = None  # time.time() when DB.open() opened us

        self._reset_counter = global_reset_counter
        self._load_count = 0  # Number of objects unghosted
        self._store_count = 0  # Number of objects stored

        # Cache which can ghostify (forget the state of) objects not
        # recently used. Its API is roughly that of a dict, with
        # additional gc-related and invalidation-related methods.
        self._cache = PickleCache(self, cache_size, cache_size_bytes)

        # The pre-cache is used by get to avoid infinite loops when
        # objects immediately load their state whern they get their
        # persistent data set.
        self._pre_cache = {}

        if version:
            # Caches for versions end up empty if the version
            # is not used for a while. Non-version caches
            # keep their content indefinitely.
            # Unclear:  Why do we want version caches to behave this way?
            self._cache.cache_drain_resistance = 100

        # List of all objects (not oids) registered as modified by the
        # persistence machinery, or by add(), or whose access caused a
        # ReadConflictError (just to be able to clean them up from the
        # cache on abort with the other modified objects). All objects
        # of this list are either in _cache or in _added.
        self._registered_objects = []

        # Dict of oid->obj added explicitly through add(). Used as a
        # preliminary cache until commit time when objects are all moved
        # to the real _cache. The objects are moved to _creating at
        # commit time.
        self._added = {}

        # During commit this is turned into a list, which receives
        # objects added as a side-effect of storing a modified object.
        self._added_during_commit = None

        # During commit, all objects go to either _modified or _creating:

        # Dict of oid->flag of new objects (without serial), either
        # added by add() or implicitely added (discovered by the
        # serializer during commit). The flag is True for implicit
        # adding. Used during abort to remove created objects from the
        # _cache, and by persistent_id to check that a new object isn't
        # reachable from multiple databases.
        self._creating = {}

        # List of oids of modified objects, which have to be invalidated
        # in the cache on abort and in other connections on finish.
        self._modified = []

        # _invalidated queues invalidate messages delivered from the DB
        # _inv_lock prevents one thread from modifying the set while
        # another is processing invalidations.  All the invalidations
        # from a single transaction should be applied atomically, so
        # the lock must be held when reading _invalidated.

        # It sucks that we have to hold the lock to read _invalidated.
        # Normally, _invalidated is written by calling dict.update, which
        # will execute atomically by virtue of the GIL.  But some storage
        # might generate oids where hash or compare invokes Python code.  In
        # that case, the GIL can't save us.
        # Note:  since that was written, it was officially declared that the
        # type of an oid is str.  TODO:  remove the related now-unnecessary
        # critical sections (if any -- this needs careful thought).

        self._inv_lock = threading.Lock()
        self._invalidated = set()

        # Flag indicating whether the cache has been invalidated:
        self._invalidatedCache = False

        # We intend to prevent committing a transaction in which
        # ReadConflictError occurs.  _conflicts is the set of oids that
        # experienced ReadConflictError.  Any time we raise ReadConflictError,
        # the oid should be added to this set, and we should be sure that the
        # object is registered.  Because it's registered, Connection.commit()
        # will raise ReadConflictError again (because the oid is in
        # _conflicts).
        self._conflicts = {}

        # If MVCC is enabled, then _mvcc is True and _txn_time stores
        # the upper bound on transactions visible to this connection.
        # That is, all object revisions must be written before _txn_time.
        # If it is None, then the current revisions are acceptable.
        # If the connection is in a version, mvcc will be disabled, because
        # loadBefore() only returns non-version data.
        self._txn_time = None

        # To support importFile(), implemented in the ExportImport base
        # class, we need to run _importDuringCommit() from our commit()
        # method.  If _import is not None, it is a two-tuple of arguments
        # to pass to _importDuringCommit().
        self._import = None

        self._reader = ObjectReader(self, self._cache, self._db.classFactory)
Example #15
0
class Connection(ExportImport, object):
    """Connection to ZODB for loading and storing objects."""

    implements(IConnection, ISavepointDataManager, IPersistentDataManager,
               ISynchronizer)

    _code_timestamp = 0

    ##########################################################################
    # Connection methods, ZODB.IConnection

    def __init__(self, db, version='', cache_size=400, cache_size_bytes=0):
        """Create a new Connection."""

        self._log = logging.getLogger('ZODB.Connection')
        self._debug_info = ()

        self._db = db
        # Multi-database support
        self.connections = {self._db.database_name: self}

        self._version = version
        self._normal_storage = self._storage = db._storage
        self.new_oid = db._storage.new_oid
        self._savepoint_storage = None

        # Do we need to join a txn manager?
        self._needs_to_join = True
        self.transaction_manager = None
        self._opened = None  # time.time() when DB.open() opened us

        self._reset_counter = global_reset_counter
        self._load_count = 0  # Number of objects unghosted
        self._store_count = 0  # Number of objects stored

        # Cache which can ghostify (forget the state of) objects not
        # recently used. Its API is roughly that of a dict, with
        # additional gc-related and invalidation-related methods.
        self._cache = PickleCache(self, cache_size, cache_size_bytes)

        # The pre-cache is used by get to avoid infinite loops when
        # objects immediately load their state whern they get their
        # persistent data set.
        self._pre_cache = {}

        if version:
            # Caches for versions end up empty if the version
            # is not used for a while. Non-version caches
            # keep their content indefinitely.
            # Unclear:  Why do we want version caches to behave this way?
            self._cache.cache_drain_resistance = 100

        # List of all objects (not oids) registered as modified by the
        # persistence machinery, or by add(), or whose access caused a
        # ReadConflictError (just to be able to clean them up from the
        # cache on abort with the other modified objects). All objects
        # of this list are either in _cache or in _added.
        self._registered_objects = []

        # Dict of oid->obj added explicitly through add(). Used as a
        # preliminary cache until commit time when objects are all moved
        # to the real _cache. The objects are moved to _creating at
        # commit time.
        self._added = {}

        # During commit this is turned into a list, which receives
        # objects added as a side-effect of storing a modified object.
        self._added_during_commit = None

        # During commit, all objects go to either _modified or _creating:

        # Dict of oid->flag of new objects (without serial), either
        # added by add() or implicitely added (discovered by the
        # serializer during commit). The flag is True for implicit
        # adding. Used during abort to remove created objects from the
        # _cache, and by persistent_id to check that a new object isn't
        # reachable from multiple databases.
        self._creating = {}

        # List of oids of modified objects, which have to be invalidated
        # in the cache on abort and in other connections on finish.
        self._modified = []

        # _invalidated queues invalidate messages delivered from the DB
        # _inv_lock prevents one thread from modifying the set while
        # another is processing invalidations.  All the invalidations
        # from a single transaction should be applied atomically, so
        # the lock must be held when reading _invalidated.

        # It sucks that we have to hold the lock to read _invalidated.
        # Normally, _invalidated is written by calling dict.update, which
        # will execute atomically by virtue of the GIL.  But some storage
        # might generate oids where hash or compare invokes Python code.  In
        # that case, the GIL can't save us.
        # Note:  since that was written, it was officially declared that the
        # type of an oid is str.  TODO:  remove the related now-unnecessary
        # critical sections (if any -- this needs careful thought).

        self._inv_lock = threading.Lock()
        self._invalidated = set()

        # Flag indicating whether the cache has been invalidated:
        self._invalidatedCache = False

        # We intend to prevent committing a transaction in which
        # ReadConflictError occurs.  _conflicts is the set of oids that
        # experienced ReadConflictError.  Any time we raise ReadConflictError,
        # the oid should be added to this set, and we should be sure that the
        # object is registered.  Because it's registered, Connection.commit()
        # will raise ReadConflictError again (because the oid is in
        # _conflicts).
        self._conflicts = {}

        # If MVCC is enabled, then _mvcc is True and _txn_time stores
        # the upper bound on transactions visible to this connection.
        # That is, all object revisions must be written before _txn_time.
        # If it is None, then the current revisions are acceptable.
        # If the connection is in a version, mvcc will be disabled, because
        # loadBefore() only returns non-version data.
        self._txn_time = None

        # To support importFile(), implemented in the ExportImport base
        # class, we need to run _importDuringCommit() from our commit()
        # method.  If _import is not None, it is a two-tuple of arguments
        # to pass to _importDuringCommit().
        self._import = None

        self._reader = ObjectReader(self, self._cache, self._db.classFactory)

    def add(self, obj):
        """Add a new object 'obj' to the database and assign it an oid."""
        if self._opened is None:
            raise ConnectionStateError("The database connection is closed")

        marker = object()
        oid = getattr(obj, "_p_oid", marker)
        if oid is marker:
            raise TypeError(
                "Only first-class persistent objects may be"
                " added to a Connection.", obj)
        elif obj._p_jar is None:
            assert obj._p_oid is None
            oid = obj._p_oid = self._storage.new_oid()
            obj._p_jar = self
            if self._added_during_commit is not None:
                self._added_during_commit.append(obj)
            self._register(obj)
            # Add to _added after calling register(), so that _added
            # can be used as a test for whether the object has been
            # registered with the transaction.
            self._added[oid] = obj
        elif obj._p_jar is not self:
            raise InvalidObjectReference(obj, obj._p_jar)

    def get(self, oid):
        """Return the persistent object with oid 'oid'."""
        if self._opened is None:
            raise ConnectionStateError("The database connection is closed")

        obj = self._cache.get(oid, None)
        if obj is not None:
            return obj
        obj = self._added.get(oid, None)
        if obj is not None:
            return obj
        obj = self._pre_cache.get(oid, None)
        if obj is not None:
            return obj

        # This appears to be an MVCC violation because we are loading
        # the must recent data when perhaps we shouldnt. The key is
        # that we are only creating a ghost!
        p, serial = self._storage.load(oid, self._version)
        obj = self._reader.getGhost(p)

        # Avoid infiniate loop if obj tries to load its state before
        # it is added to the cache and it's state refers to it.
        self._pre_cache[oid] = obj
        obj._p_oid = oid
        obj._p_jar = self
        obj._p_changed = None
        obj._p_serial = serial
        self._pre_cache.pop(oid)
        self._cache[oid] = obj
        return obj

    def cacheMinimize(self):
        """Deactivate all unmodified objects in the cache."""
        self._cache.minimize()

    # TODO: we should test what happens when cacheGC is called mid-transaction.
    def cacheGC(self):
        """Reduce cache size to target size."""
        self._cache.incrgc()

    __onCloseCallbacks = None

    def onCloseCallback(self, f):
        """Register a callable, f, to be called by close()."""
        if self.__onCloseCallbacks is None:
            self.__onCloseCallbacks = []
        self.__onCloseCallbacks.append(f)

    def close(self, primary=True):
        """Close the Connection."""
        if not self._needs_to_join:
            # We're currently joined to a transaction.
            raise ConnectionStateError("Cannot close a connection joined to "
                                       "a transaction")

        if self._cache is not None:
            self._cache.incrgc()  # This is a good time to do some GC

        # Call the close callbacks.
        if self.__onCloseCallbacks is not None:
            for f in self.__onCloseCallbacks:
                try:
                    f()
                except:  # except what?
                    f = getattr(f, 'im_self', f)
                    self._log.error("Close callback failed for %s",
                                    f,
                                    exc_info=sys.exc_info())
            self.__onCloseCallbacks = None

        self._debug_info = ()

        if self._opened:
            self.transaction_manager.unregisterSynch(self)

        if primary:
            for connection in self.connections.values():
                if connection is not self:
                    connection.close(False)

            # Return the connection to the pool.
            if self._opened is not None:
                self._db._returnToPool(self)

                # _returnToPool() set self._opened to None.
                # However, we can't assert that here, because self may
                # have been reused (by another thread) by the time we
                # get back here.
        else:
            self._opened = None
            am = self._db._activity_monitor
            if am is not None:
                am.closedConnection(self)

    def db(self):
        """Returns a handle to the database this connection belongs to."""
        return self._db

    def isReadOnly(self):
        """Returns True if the storage for this connection is read only."""
        if self._opened is None:
            raise ConnectionStateError("The database connection is closed")
        return self._storage.isReadOnly()

    def invalidate(self, tid, oids):
        """Notify the Connection that transaction 'tid' invalidated oids."""
        self._inv_lock.acquire()
        try:
            if self._txn_time is None:
                self._txn_time = tid
            self._invalidated.update(oids)
        finally:
            self._inv_lock.release()

    def invalidateCache(self):
        self._inv_lock.acquire()
        try:
            self._invalidatedCache = True
        finally:
            self._inv_lock.release()

    def root(self):
        """Return the database root object."""
        return self.get(z64)

    def getVersion(self):
        """Returns the version this connection is attached to."""
        if self._storage is None:
            raise ConnectionStateError("The database connection is closed")
        return self._version

    def get_connection(self, database_name):
        """Return a Connection for the named database."""
        connection = self.connections.get(database_name)
        if connection is None:
            new_con = self._db.databases[database_name].open(
                transaction_manager=self.transaction_manager,
                version=self._version,
            )
            self.connections.update(new_con.connections)
            new_con.connections = self.connections
            connection = new_con
        return connection

    def _implicitlyAdding(self, oid):
        """Are we implicitly adding an object within the current transaction

        This is used in a check to avoid implicitly adding an object
        to a database in a multi-database situation.
        See serialize.ObjectWriter.persistent_id.

        """
        return (self._creating.get(oid, 0)
                or ((self._savepoint_storage is not None)
                    and self._savepoint_storage.creating.get(oid, 0)))

    def sync(self):
        """Manually update the view on the database."""
        self.transaction_manager.abort()
        self._storage_sync()

    def getDebugInfo(self):
        """Returns a tuple with different items for debugging the
        connection.
        """
        return self._debug_info

    def setDebugInfo(self, *args):
        """Add the given items to the debug information of this connection."""
        self._debug_info = self._debug_info + args

    def getTransferCounts(self, clear=False):
        """Returns the number of objects loaded and stored."""
        res = self._load_count, self._store_count
        if clear:
            self._load_count = 0
            self._store_count = 0
        return res

    # Connection methods
    ##########################################################################

    ##########################################################################
    # Data manager (ISavepointDataManager) methods

    def abort(self, transaction):
        """Abort a transaction and forget all changes."""

        # The order is important here.  We want to abort registered
        # objects before we process the cache.  Otherwise, we may un-add
        # objects added in savepoints.  If they've been modified since
        # the savepoint, then they won't have _p_oid or _p_jar after
        # they've been unadded. This will make the code in _abort
        # confused.

        self._abort()

        if self._savepoint_storage is not None:
            self._abort_savepoint()

        self._tpc_cleanup()

    def _abort(self):
        """Abort a transaction and forget all changes."""

        for obj in self._registered_objects:
            oid = obj._p_oid
            assert oid is not None
            if oid in self._added:
                del self._added[oid]
                del obj._p_jar
                del obj._p_oid
            else:

                # Note: If we invalidate a non-ghostifiable object
                # (i.e. a persistent class), the object will
                # immediately reread its state.  That means that the
                # following call could result in a call to
                # self.setstate, which, of course, must succeed.
                # In general, it would be better if the read could be
                # delayed until the start of the next transaction.  If
                # we read at the end of a transaction and if the
                # object was invalidated during this transaction, then
                # we'll read non-current data, which we'll discard
                # later in transaction finalization.  Unfortnately, we
                # can only delay the read if this abort corresponds to
                # a top-level-transaction abort.  We can't tell if
                # this is a top-level-transaction abort, so we have to
                # go ahead and invalidate now.  Fortunately, it's
                # pretty unlikely that the object we are invalidating
                # was invalidated by another thread, so the risk of a
                # reread is pretty low.

                self._cache.invalidate(oid)

    def _tpc_cleanup(self):
        """Performs cleanup operations to support tpc_finish and tpc_abort."""
        self._conflicts.clear()
        self._needs_to_join = True
        self._registered_objects = []
        self._creating.clear()

    # Process pending invalidations.
    def _flush_invalidations(self):
        self._inv_lock.acquire()
        try:
            # Non-ghostifiable objects may need to read when they are
            # invalidated, so we'll quickly just replace the
            # invalidating dict with a new one.  We'll then process
            # the invalidations after freeing the lock *and* after
            # resetting the time.  This means that invalidations will
            # happen after the start of the transactions.  They are
            # subject to conflict errors and to reading old data.

            # TODO: There is a potential problem lurking for persistent
            # classes.  Suppose we have an invalidation of a persistent
            # class and of an instance.  If the instance is
            # invalidated first and if the invalidation logic uses
            # data read from the class, then the invalidation could
            # be performed with stale data.  Or, suppose that there
            # are instances of the class that are freed as a result of
            # invalidating some object.  Perhaps code in their __del__
            # uses class data.  Really, the only way to properly fix
            # this is to, in fact, make classes ghostifiable.  Then
            # we'd have to reimplement attribute lookup to check the
            # class state and, if necessary, activate the class.  It's
            # much worse than that though, because we'd also need to
            # deal with slots.  When a class is ghostified, we'd need
            # to replace all of the slot operations with versions that
            # reloaded the object when called. It's hard to say which
            # is better or worse.  For now, it seems the risk of
            # using a class while objects are being invalidated seems
            # small enough to be acceptable.

            invalidated = dict.fromkeys(self._invalidated)
            self._invalidated = set()
            self._txn_time = None
            if self._invalidatedCache:
                self._invalidatedCache = False
                invalidated = self._cache.cache_data.copy()
        finally:
            self._inv_lock.release()

        self._cache.invalidate(invalidated)

        # Now is a good time to collect some garbage.
        self._cache.incrgc()

    def tpc_begin(self, transaction):
        """Begin commit of a transaction, starting the two-phase commit."""
        self._modified = []

        # _creating is a list of oids of new objects, which is used to
        # remove them from the cache if a transaction aborts.
        self._creating.clear()
        self._normal_storage.tpc_begin(transaction)

    def commit(self, transaction):
        """Commit changes to an object"""

        if self._savepoint_storage is not None:

            # We first checkpoint the current changes to the savepoint
            self.savepoint()

            # then commit all of the savepoint changes at once
            self._commit_savepoint(transaction)

            # No need to call _commit since savepoint did.

        else:
            self._commit(transaction)

    def _commit(self, transaction):
        """Commit changes to an object"""

        if self._import:
            # We are importing an export file. We alsways do this
            # while making a savepoint so we can copy export data
            # directly to our storage, typically a TmpStore.
            self._importDuringCommit(transaction, *self._import)
            self._import = None

        # Just in case an object is added as a side-effect of storing
        # a modified object.  If, for example, a __getstate__() method
        # calls add(), the newly added objects will show up in
        # _added_during_commit.  This sounds insane, but has actually
        # happened.

        self._added_during_commit = []

        if self._invalidatedCache:
            raise ConflictError()

        for obj in self._registered_objects:
            oid = obj._p_oid
            assert oid
            if oid in self._conflicts:
                raise ReadConflictError(object=obj)

            if obj._p_jar is not self:
                raise InvalidObjectReference(obj, obj._p_jar)
            elif oid in self._added:
                assert obj._p_serial == z64
            elif obj._p_changed:
                if oid in self._invalidated:
                    resolve = getattr(obj, "_p_resolveConflict", None)
                    if resolve is None:
                        raise ConflictError(object=obj)
                self._modified.append(oid)
            else:
                # Nothing to do.  It's been said that it's legal, e.g., for
                # an object to set _p_changed to false after it's been
                # changed and registered.
                continue

            self._store_objects(ObjectWriter(obj), transaction)

        for obj in self._added_during_commit:
            self._store_objects(ObjectWriter(obj), transaction)
        self._added_during_commit = None

    def _store_objects(self, writer, transaction):
        for obj in writer:
            oid = obj._p_oid
            serial = getattr(obj, "_p_serial", z64)

            if ((serial == z64)
                    and ((self._savepoint_storage is None) or
                         (oid not in self._savepoint_storage.creating)
                         or self._savepoint_storage.creating[oid])):

                # obj is a new object

                # Because obj was added, it is now in _creating, so it
                # can be removed from _added.  If oid wasn't in
                # adding, then we are adding it implicitly.

                implicitly_adding = self._added.pop(oid, None) is None

                self._creating[oid] = implicitly_adding

            else:
                if (oid in self._invalidated
                        and not hasattr(obj, '_p_resolveConflict')):
                    raise ConflictError(object=obj)
                self._modified.append(oid)
            p = writer.serialize(obj)  # This calls __getstate__ of obj

            if isinstance(obj, Blob):
                if not IBlobStorage.providedBy(self._storage):
                    raise Unsupported("Storing Blobs in %s is not supported." %
                                      repr(self._storage))
                if obj.opened():
                    raise ValueError("Can't commit with opened blobs.")
                s = self._storage.storeBlob(oid, serial, p, obj._uncommitted(),
                                            self._version, transaction)
                # we invalidate the object here in order to ensure
                # that that the next attribute access of its name
                # unghostify it, which will cause its blob data
                # to be reattached "cleanly"
                obj._p_invalidate()
            else:
                s = self._storage.store(oid, serial, p, self._version,
                                        transaction)
            self._cache.update_object_size_estimation(oid, len(p))
            obj._p_estimated_size = len(p)
            self._store_count += 1
            # Put the object in the cache before handling the
            # response, just in case the response contains the
            # serial number for a newly created object
            try:
                self._cache[oid] = obj
            except:
                # Dang, I bet it's wrapped:
                # TODO:  Deprecate, then remove, this.
                if hasattr(obj, 'aq_base'):
                    self._cache[oid] = obj.aq_base
                else:
                    raise

            self._handle_serial(s, oid)

    def _handle_serial(self, store_return, oid=None, change=1):
        """Handle the returns from store() and tpc_vote() calls."""

        # These calls can return different types depending on whether
        # ZEO is used.  ZEO uses asynchronous returns that may be
        # returned in batches by the ClientStorage.  ZEO1 can also
        # return an exception object and expect that the Connection
        # will raise the exception.

        # When conflict resolution occurs, the object state held by
        # the connection does not match what is written to the
        # database.  Invalidate the object here to guarantee that
        # the new state is read the next time the object is used.

        if not store_return:
            return
        if isinstance(store_return, str):
            assert oid is not None
            self._handle_one_serial(oid, store_return, change)
        else:
            for oid, serial in store_return:
                self._handle_one_serial(oid, serial, change)

    def _handle_one_serial(self, oid, serial, change):
        if not isinstance(serial, str):
            raise serial
        obj = self._cache.get(oid, None)
        if obj is None:
            return
        if serial == ResolvedSerial:
            del obj._p_changed  # transition from changed to ghost
        else:
            if change:
                obj._p_changed = 0  # transition from changed to up-to-date
            obj._p_serial = serial

    def tpc_abort(self, transaction):
        if self._import:
            self._import = None

        if self._savepoint_storage is not None:
            self._abort_savepoint()

        self._storage.tpc_abort(transaction)

        # Note: If we invalidate a non-ghostifiable object (i.e. a
        # persistent class), the object will immediately reread its
        # state.  That means that the following call could result in a
        # call to self.setstate, which, of course, must succeed.  In
        # general, it would be better if the read could be delayed
        # until the start of the next transaction.  If we read at the
        # end of a transaction and if the object was invalidated
        # during this transaction, then we'll read non-current data,
        # which we'll discard later in transaction finalization.  We
        # could, theoretically queue this invalidation by calling
        # self.invalidate.  Unfortunately, attempts to make that
        # change resulted in mysterious test failures.  It's pretty
        # unlikely that the object we are invalidating was invalidated
        # by another thread, so the risk of a reread is pretty low.
        # It's really not worth the effort to pursue this.

        self._cache.invalidate(self._modified)
        self._invalidate_creating()
        while self._added:
            oid, obj = self._added.popitem()
            del obj._p_oid
            del obj._p_jar
        self._tpc_cleanup()

    def _invalidate_creating(self, creating=None):
        """Disown any objects newly saved in an uncommitted transaction."""
        if creating is None:
            creating = self._creating
            self._creating = {}

        for oid in creating:
            o = self._cache.get(oid)
            if o is not None:
                del self._cache[oid]
                del o._p_jar
                del o._p_oid

    def tpc_vote(self, transaction):
        """Verify that a data manager can commit the transaction."""
        try:
            vote = self._storage.tpc_vote
        except AttributeError:
            return
        s = vote(transaction)
        self._handle_serial(s)

    def tpc_finish(self, transaction):
        """Indicate confirmation that the transaction is done."""
        def callback(tid):
            d = dict.fromkeys(self._modified)
            self._db.invalidate(tid, d, self)


#       It's important that the storage calls the passed function
#       while it still has its lock.  We don't want another thread
#       to be able to read any updated data until we've had a chance
#       to send an invalidation message to all of the other
#       connections!

        self._storage.tpc_finish(transaction, callback)
        self._tpc_cleanup()

    def sortKey(self):
        """Return a consistent sort key for this connection."""
        return "%s:%s" % (self._storage.sortKey(), id(self))

    # Data manager (ISavepointDataManager) methods
    ##########################################################################

    ##########################################################################
    # Transaction-manager synchronization -- ISynchronizer

    def beforeCompletion(self, txn):
        # We don't do anything before a commit starts.
        pass

    # Call the underlying storage's sync() method (if any), and process
    # pending invalidations regardless.  Of course this should only be
    # called at transaction boundaries.
    def _storage_sync(self, *ignored):
        sync = getattr(self._storage, 'sync', 0)
        if sync:
            sync()
        self._flush_invalidations()

    afterCompletion = _storage_sync
    newTransaction = _storage_sync

    # Transaction-manager synchronization -- ISynchronizer
    ##########################################################################

    ##########################################################################
    # persistent.interfaces.IPersistentDatamanager

    def oldstate(self, obj, tid):
        """Return copy of 'obj' that was written by transaction 'tid'."""
        assert obj._p_jar is self
        p = self._storage.loadSerial(obj._p_oid, tid)
        return self._reader.getState(p)

    def setstate(self, obj):
        """Turns the ghost 'obj' into a real object by loading its state from
        the database."""
        oid = obj._p_oid

        if self._opened is None:
            msg = ("Shouldn't load state for %s "
                   "when the connection is closed" % oid_repr(oid))
            self._log.error(msg)
            raise ConnectionStateError(msg)

        try:
            self._setstate(obj)
        except ConflictError:
            raise
        except:
            self._log.error("Couldn't load state for %s",
                            oid_repr(oid),
                            exc_info=sys.exc_info())
            raise

    def _setstate(self, obj):
        # Helper for setstate(), which provides logging of failures.

        # The control flow is complicated here to avoid loading an
        # object revision that we are sure we aren't going to use.  As
        # a result, invalidation tests occur before and after the
        # load.  We can only be sure about invalidations after the
        # load.

        # If an object has been invalidated, there are several cases
        # to consider:
        # 1. Check _p_independent()
        # 2. Try MVCC
        # 3. Raise ConflictError.

        # Does anything actually use _p_independent()?  It would simplify
        # the code if we could drop support for it.
        # (BTrees.Length does.)

        # There is a harmless data race with self._invalidated.  A
        # dict update could go on in another thread, but we don't care
        # because we have to check again after the load anyway.

        if self._invalidatedCache:
            raise ReadConflictError()

        if (obj._p_oid in self._invalidated
                and not myhasattr(obj, "_p_independent")):
            # If the object has _p_independent(), we will handle it below.
            self._load_before_or_conflict(obj)
            return

        p, serial = self._storage.load(obj._p_oid, self._version)
        self._load_count += 1

        self._inv_lock.acquire()
        try:
            invalid = obj._p_oid in self._invalidated
        finally:
            self._inv_lock.release()

        if invalid:
            if myhasattr(obj, "_p_independent"):
                # This call will raise a ReadConflictError if something
                # goes wrong
                self._handle_independent(obj)
            else:
                self._load_before_or_conflict(obj)
                return

        self._reader.setGhostState(obj, p)
        obj._p_serial = serial
        self._cache.update_object_size_estimation(obj._p_oid, len(p))
        obj._p_estimated_size = len(p)

        # Blob support
        if isinstance(obj, Blob):
            obj._p_blob_uncommitted = None
            obj._p_blob_committed = self._storage.loadBlob(obj._p_oid, serial)

    def _load_before_or_conflict(self, obj):
        """Load non-current state for obj or raise ReadConflictError."""
        if not ((not self._version) and self._setstate_noncurrent(obj)):
            self._register(obj)
            self._conflicts[obj._p_oid] = True
            raise ReadConflictError(object=obj)

    def _setstate_noncurrent(self, obj):
        """Set state using non-current data.

        Return True if state was available, False if not.
        """
        try:
            # Load data that was current before the commit at txn_time.
            t = self._storage.loadBefore(obj._p_oid, self._txn_time)
        except KeyError:
            return False
        if t is None:
            return False
        data, start, end = t
        # The non-current transaction must have been written before
        # txn_time.  It must be current at txn_time, but could have
        # been modified at txn_time.

        assert start < self._txn_time, (u64(start), u64(self._txn_time))
        assert end is not None
        assert self._txn_time <= end, (u64(self._txn_time), u64(end))
        self._reader.setGhostState(obj, data)
        obj._p_serial = start
        return True

    def _handle_independent(self, obj):
        # Helper method for setstate() handles possibly independent objects
        # Call _p_independent(), if it returns True, setstate() wins.
        # Otherwise, raise a ConflictError.

        if obj._p_independent():
            self._inv_lock.acquire()
            try:
                try:
                    self._invalidated.remove(obj._p_oid)
                except KeyError:
                    pass
            finally:
                self._inv_lock.release()
        else:
            self._conflicts[obj._p_oid] = 1
            self._register(obj)
            raise ReadConflictError(object=obj)

    def register(self, obj):
        """Register obj with the current transaction manager.

        A subclass could override this method to customize the default
        policy of one transaction manager for each thread.

        obj must be an object loaded from this Connection.
        """
        assert obj._p_jar is self
        if obj._p_oid is None:
            # The actual complaint here is that an object without
            # an oid is being registered.  I can't think of any way to
            # achieve that without assignment to _p_jar.  If there is
            # a way, this will be a very confusing exception.
            raise ValueError("assigning to _p_jar is not supported")
        elif obj._p_oid in self._added:
            # It was registered before it was added to _added.
            return
        self._register(obj)

    def _register(self, obj=None):

        # The order here is important.  We need to join before
        # registering the object, because joining may take a
        # savepoint, and the savepoint should not reflect the change
        # to the object.

        if self._needs_to_join:
            self.transaction_manager.get().join(self)
            self._needs_to_join = False

        if obj is not None:
            self._registered_objects.append(obj)

    # persistent.interfaces.IPersistentDatamanager
    ##########################################################################

    ##########################################################################
    # PROTECTED stuff (used by e.g. ZODB.DB.DB)

    def _cache_items(self):
        # find all items on the lru list
        items = self._cache.lru_items()
        # fine everything. some on the lru list, some not
        everything = self._cache.cache_data
        # remove those items that are on the lru list
        for k, v in items:
            del everything[k]
        # return a list of [ghosts....not recently used.....recently used]
        return everything.items() + items

    def open(self, transaction_manager=None, delegate=True):
        """Register odb, the DB that this Connection uses.

        This method is called by the DB every time a Connection
        is opened.  Any invalidations received while the Connection
        was closed will be processed.

        If the global module function resetCaches() was called, the
        cache will be cleared.

        Parameters:
        odb: database that owns the Connection
        transaction_manager: transaction manager to use.  None means
            use the default transaction manager.
        register for afterCompletion() calls.
        """

        self._opened = time()

        if transaction_manager is None:
            transaction_manager = transaction.manager

        self.transaction_manager = transaction_manager

        if self._reset_counter != global_reset_counter:
            # New code is in place.  Start a new cache.
            self._resetCache()
        else:
            self._flush_invalidations()

        transaction_manager.registerSynch(self)

        if self._cache is not None:
            self._cache.incrgc()  # This is a good time to do some GC

        if delegate:
            # delegate open to secondary connections
            for connection in self.connections.values():
                if connection is not self:
                    connection.open(transaction_manager, False)

    def _resetCache(self):
        """Creates a new cache, discarding the old one.

        See the docstring for the resetCaches() function.
        """
        self._reset_counter = global_reset_counter
        self._invalidated.clear()
        self._invalidatedCache = False
        cache_size = self._cache.cache_size
        cache_size_bytes = self._cache.cache_size_bytes
        self._cache = cache = PickleCache(self, cache_size, cache_size_bytes)

    ##########################################################################
    # Python protocol

    def __repr__(self):
        if self._version:
            ver = ' (in version %s)' % ` self._version `
        else:
            ver = ''
        return '<Connection at %08x%s>' % (positive_id(self), ver)

    # Python protocol
    ##########################################################################

    ##########################################################################
    # DEPRECATION candidates

    __getitem__ = get

    def modifiedInVersion(self, oid):
        """Returns the version the object with the given oid was modified in.

        If it wasn't modified in a version, the current version of this
        connection is returned.
        """
        try:
            return self._db.modifiedInVersion(oid)
        except KeyError:
            return self.getVersion()

    def exchange(self, old, new):
        # called by a ZClasses method that isn't executed by the test suite
        oid = old._p_oid
        new._p_oid = oid
        new._p_jar = self
        new._p_changed = 1
        self._register(new)
        self._cache[oid] = new

    # DEPRECATION candidates
    ##########################################################################

    ##########################################################################
    # DEPRECATED methods

    # None at present.

    # DEPRECATED methods
    ##########################################################################

    #####################################################################
    # Savepoint support

    def savepoint(self):
        if self._savepoint_storage is None:
            tmpstore = TmpStore(self._version, self._normal_storage)
            self._savepoint_storage = tmpstore
            self._storage = self._savepoint_storage

        self._creating.clear()
        self._commit(None)
        self._storage.creating.update(self._creating)
        self._creating.clear()
        self._registered_objects = []

        state = self._storage.position, self._storage.index.copy()
        result = Savepoint(self, state)
        # While the interface doesn't guarantee this, savepoints are
        # sometimes used just to "break up" very long transactions, and as
        # a pragmatic matter this is a good time to reduce the cache
        # memory burden.
        self.cacheGC()
        return result

    def _rollback(self, state):
        self._abort()
        self._registered_objects = []
        src = self._storage
        self._cache.invalidate(src.index)
        src.reset(*state)

    def _commit_savepoint(self, transaction):
        """Commit all changes made in savepoints and begin 2-phase commit
        """
        src = self._savepoint_storage
        self._storage = self._normal_storage
        self._savepoint_storage = None

        self._log.debug("Committing savepoints of size %s", src.getSize())
        oids = src.index.keys()

        # Copy invalidating and creating info from temporary storage:
        self._modified.extend(oids)
        self._creating.update(src.creating)

        for oid in oids:
            data, serial = src.load(oid, src)
            obj = self._cache.get(oid, None)
            if obj is not None:
                self._cache.update_object_size_estimation(
                    obj._p_oid, len(data))
                obj._p_estimated_size = len(data)
            if isinstance(self._reader.getGhost(data), Blob):
                blobfilename = src.loadBlob(oid, serial)
                s = self._storage.storeBlob(oid, serial, data, blobfilename,
                                            self._version, transaction)
                # we invalidate the object here in order to ensure
                # that that the next attribute access of its name
                # unghostify it, which will cause its blob data
                # to be reattached "cleanly"
                self.invalidate(s, {oid: True})
            else:
                s = self._storage.store(oid, serial, data, self._version,
                                        transaction)

            self._handle_serial(s, oid, change=False)
        src.close()

    def _abort_savepoint(self):
        """Discard all savepoint data."""
        src = self._savepoint_storage
        self._storage = self._normal_storage
        self._savepoint_storage = None

        # Note: If we invalidate a non-ghostifiable object (i.e. a
        # persistent class), the object will immediately reread it's
        # state.  That means that the following call could result in a
        # call to self.setstate, which, of course, must succeed.  In
        # general, it would be better if the read could be delayed
        # until the start of the next transaction.  If we read at the
        # end of a transaction and if the object was invalidated
        # during this transaction, then we'll read non-current data,
        # which we'll discard later in transaction finalization.  We
        # could, theoretically queue this invalidation by calling
        # self.invalidate.  Unfortunately, attempts to make that
        # change resulted in mysterious test failures.  It's pretty
        # unlikely that the object we are invalidating was invalidated
        # by another thread, so the risk of a reread is pretty low.
        # It's really not worth the effort to pursue this.

        self._cache.invalidate(src.index)
        self._invalidate_creating(src.creating)
        src.close()
Example #16
0
    def __init__(self, db, version='', cache_size=400):
        """Create a new Connection."""

        self._db = db
        self._normal_storage = self._storage = db._storage
        self.new_oid = db._storage.new_oid
        self._savepoint_storage = None

        self.transaction_manager = self._synch = self._mvcc = None

        self._log = logging.getLogger("ZODB.Connection")
        self._debug_info = ()
        self._opened = None  # time.time() when DB.open() opened us

        self._version = version
        self._cache = cache = PickleCache(self, cache_size)
        if version:
            # Caches for versions end up empty if the version
            # is not used for a while. Non-version caches
            # keep their content indefinitely.
            # Unclear:  Why do we want version caches to behave this way?

            self._cache.cache_drain_resistance = 100

        self._committed = []
        self._added = {}
        self._added_during_commit = None
        self._reset_counter = global_reset_counter
        self._load_count = 0  # Number of objects unghosted
        self._store_count = 0  # Number of objects stored
        self._creating = {}

        # List of oids of modified objects (to be invalidated on an abort).
        self._modified = []

        # List of all objects (not oids) registered as modified by the
        # persistence machinery.
        self._registered_objects = []

        # Do we need to join a txn manager?
        self._needs_to_join = True

        # _invalidated queues invalidate messages delivered from the DB
        # _inv_lock prevents one thread from modifying the set while
        # another is processing invalidations.  All the invalidations
        # from a single transaction should be applied atomically, so
        # the lock must be held when reading _invalidated.

        # It sucks that we have to hold the lock to read _invalidated.
        # Normally, _invalidated is written by calling dict.update, which
        # will execute atomically by virtue of the GIL.  But some storage
        # might generate oids where hash or compare invokes Python code.  In
        # that case, the GIL can't save us.
        # Note:  since that was written, it was officially declared that the
        # type of an oid is str.  TODO:  remove the related now-unnecessary
        # critical sections (if any -- this needs careful thought).

        self._inv_lock = threading.Lock()
        self._invalidated = d = {}

        # We intend to prevent committing a transaction in which
        # ReadConflictError occurs.  _conflicts is the set of oids that
        # experienced ReadConflictError.  Any time we raise ReadConflictError,
        # the oid should be added to this set, and we should be sure that the
        # object is registered.  Because it's registered, Connection.commit()
        # will raise ReadConflictError again (because the oid is in
        # _conflicts).
        self._conflicts = {}

        # If MVCC is enabled, then _mvcc is True and _txn_time stores
        # the upper bound on transactions visible to this connection.
        # That is, all object revisions must be written before _txn_time.
        # If it is None, then the current revisions are acceptable.
        # If the connection is in a version, mvcc will be disabled, because
        # loadBefore() only returns non-version data.
        self._txn_time = None

        # To support importFile(), implemented in the ExportImport base
        # class, we need to run _importDuringCommit() from our commit()
        # method.  If _import is not None, it is a two-tuple of arguments
        # to pass to _importDuringCommit().
        self._import = None

        self._reader = ObjectReader(self, self._cache, self._db.classFactory)

        # Multi-database support
        self.connections = {self._db.database_name: self}