def cloneByPickle(obj, ignore_list=()): """Makes a copy of a ZODB object, loading ghosts as needed. Ignores specified objects along the way, replacing them with None in the copy. """ ignore_dict = {} for o in ignore_list: ignore_dict[id(o)] = o def persistent_id(ob, ignore_dict=ignore_dict): if id(ob) in ignore_dict: return 'ignored' if getattr(ob, '_p_changed', 0) is None: ob._p_changed = 0 return None def persistent_load(ref): assert ref == 'ignored' # Return a placeholder object that will be replaced by # removeNonVersionedData(). placeholder = SimpleItem() placeholder.id = "ignored_subobject" return placeholder stream = BytesIO() p = Pickler(stream, 1) p.persistent_id = persistent_id p.dump(obj) stream.seek(0) u = Unpickler(stream) u.persistent_load = persistent_load return u.load()
class ObjectWriter: """Serializes objects for storage in the database. The ObjectWriter creates object pickles in the ZODB format. It also detects new persistent objects reachable from the current object. """ _jar = None def __init__(self, obj=None): self._file = BytesIO() self._p = Pickler(self._file, _protocol) if sys.version_info[0] < 3: self._p.inst_persistent_id = self.persistent_id # PyPy uses a python implementation of cPickle in both Python 2 # and Python 3. We can't really detect inst_persistent_id as its # a magic attribute that's not readable, but it doesn't hurt to # simply always assign to persistent_id also self._p.persistent_id = self.persistent_id else: self._p.persistent_id = self.persistent_id self._stack = [] if obj is not None: self._stack.append(obj) jar = obj._p_jar assert myhasattr(jar, "new_oid") self._jar = jar def persistent_id(self, obj): """Return the persistent id for obj. >>> from ZODB.tests.util import P >>> class DummyJar: ... xrefs = True ... def new_oid(self): ... return 42 ... def db(self): ... return self ... databases = {} >>> jar = DummyJar() >>> class O: ... _p_jar = jar >>> writer = ObjectWriter(O) Normally, object references include the oid and a cached named reference to the class. Having the class information available allows fast creation of the ghost, avoiding requiring an additional database lookup. >>> bob = P('bob') >>> oid, cls = writer.persistent_id(bob) >>> oid 42 >>> cls is P True If a persistent object does not already have an oid and jar, these will be assigned by persistent_id(): >>> bob._p_oid 42 >>> bob._p_jar is jar True If the object already has a persistent id, the id is not changed: >>> bob._p_oid = 24 >>> oid, cls = writer.persistent_id(bob) >>> oid 24 >>> cls is P True If the jar doesn't match that of the writer, an error is raised: >>> bob._p_jar = DummyJar() >>> writer.persistent_id(bob) ... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS Traceback (most recent call last): ... InvalidObjectReference: ('Attempt to store an object from a foreign database connection', <ZODB.serialize.DummyJar ...>, P(bob)) Constructor arguments used by __new__(), as returned by __getnewargs__(), can affect memory allocation, but may also change over the life of the object. This makes it useless to cache even the object's class. >>> class PNewArgs(P): ... def __getnewargs__(self): ... return () >>> sam = PNewArgs('sam') >>> writer.persistent_id(sam) 42 >>> sam._p_oid 42 >>> sam._p_jar is jar True Check that simple objects don't get accused of persistence: >>> writer.persistent_id(42) >>> writer.persistent_id(object()) Check that a classic class doesn't get identified improperly: >>> class ClassicClara: ... pass >>> clara = ClassicClara() >>> writer.persistent_id(clara) """ # Most objects are not persistent. The following cheap test # identifies most of them. For these, we return None, # signalling that the object should be pickled normally. if not isinstance(obj, (Persistent, type, WeakRef)): # Not persistent, pickle normally return None # Any persistent object must have an oid: try: oid = obj._p_oid except AttributeError: # Not persistent, pickle normally return None if not (oid is None or isinstance(oid, bytes)): # Deserves a closer look: # Make sure it's not a descriptor if hasattr(oid, "__get__"): # The oid is a descriptor. That means obj is a non-persistent # class whose instances are persistent, so ... # Not persistent, pickle normally return None if oid is WeakRefMarker: # we have a weakref, see weakref.py oid = obj.oid if oid is None: target = obj() # get the referenced object oid = target._p_oid if oid is None: # Here we are causing the object to be saved in # the database. One could argue that we shouldn't # do this, because a weakref should not cause an object # to be added. We'll be optimistic, though, and # assume that the object will be added eventually. oid = self._jar.new_oid() target._p_jar = self._jar target._p_oid = oid self._stack.append(target) obj.oid = oid obj.dm = target._p_jar obj.database_name = obj.dm.db().database_name if obj.dm is self._jar: return ["w", (oid,)] else: return ["w", (oid, obj.database_name)] # Since we have an oid, we have either a persistent instance # (an instance of Persistent), or a persistent class. # NOTE! Persistent classes don't (and can't) subclass persistent. database_name = None if oid is None: oid = obj._p_oid = self._jar.new_oid() obj._p_jar = self._jar self._stack.append(obj) elif obj._p_jar is not self._jar: if not self._jar.db().xrefs: raise InvalidObjectReference( "Database %r doesn't allow implicit cross-database " "references" % self._jar.db().database_name, self._jar, obj, ) try: otherdb = obj._p_jar.db() database_name = otherdb.database_name except AttributeError: otherdb = self if self._jar.db().databases.get(database_name) is not otherdb: raise InvalidObjectReference( "Attempt to store an object from a foreign " "database connection", self._jar, obj ) if self._jar.get_connection(database_name) is not obj._p_jar: raise InvalidObjectReference( "Attempt to store a reference to an object from " "a separate connection to the same database or " "multidatabase", self._jar, obj, ) # OK, we have an object from another database. # Lets make sure the object ws not *just* loaded. if obj._p_jar._implicitlyAdding(oid): raise InvalidObjectReference( "A new object is reachable from multiple databases. " "Won't try to guess which one was correct!", self._jar, obj, ) klass = type(obj) if hasattr(klass, "__getnewargs__"): # We don't want to save newargs in object refs. # It's possible that __getnewargs__ is degenerate and # returns (), but we don't want to have to deghostify # the object to find out. # Note that this has the odd effect that, if the class has # __getnewargs__ of its own, we'll lose the optimization # of caching the class info. if database_name is not None: return ["n", (database_name, oid)] return oid # Note that we never get here for persistent classes. # We'll use direct refs for normal classes. if database_name is not None: return ["m", (database_name, oid, klass)] return oid, klass def serialize(self, obj): # We don't use __class__ here, because obj could be a persistent proxy. # We don't want to be fooled by proxies. klass = type(obj) # We want to serialize persistent classes by name if they have # a non-None non-empty module so as not to have a direct # ref. This is important when copying. We probably want to # revisit this in the future. newargs = getattr(obj, "__getnewargs__", None) if isinstance(getattr(klass, "_p_oid", 0), _oidtypes) and klass.__module__: # This is a persistent class with a non-empty module. This # uses pickle format #3 or #7. klass = klass.__module__, klass.__name__ if newargs is None: meta = klass, None else: meta = klass, newargs() elif newargs is None: # Pickle format #1. meta = klass else: # Pickle format #2. meta = klass, newargs() return self._dump(meta, obj.__getstate__()) def _dump(self, classmeta, state): # To reuse the existing BytesIO object, we must reset # the file position to 0 and truncate the file after the # new pickle is written. self._file.seek(0) self._p.clear_memo() self._p.dump(classmeta) self._p.dump(state) self._file.truncate() return self._file.getvalue() def __iter__(self): return NewObjectIterator(self._stack)
class ObjectWriter: """Serializes objects for storage in the database. The ObjectWriter creates object pickles in the ZODB format. It also detects new persistent objects reachable from the current object. """ _jar = None def __init__(self, obj=None): self._file = BytesIO() self._p = PersistentPickler(self.persistent_id, self._file, _protocol) self._stack = [] if obj is not None: self._stack.append(obj) jar = obj._p_jar assert myhasattr(jar, "new_oid") self._jar = jar def persistent_id(self, obj): """Return the persistent id for obj. >>> from ZODB.tests.util import P >>> class DummyJar: ... xrefs = True ... def new_oid(self): ... return 42 ... def db(self): ... return self ... databases = {} >>> jar = DummyJar() >>> class O: ... _p_jar = jar >>> writer = ObjectWriter(O) Normally, object references include the oid and a cached named reference to the class. Having the class information available allows fast creation of the ghost, avoiding requiring an additional database lookup. >>> bob = P('bob') >>> oid, cls = writer.persistent_id(bob) >>> oid 42 >>> cls is P True If a persistent object does not already have an oid and jar, these will be assigned by persistent_id(): >>> bob._p_oid 42 >>> bob._p_jar is jar True If the object already has a persistent id, the id is not changed: >>> bob._p_oid = 24 >>> oid, cls = writer.persistent_id(bob) >>> oid 24 >>> cls is P True If the jar doesn't match that of the writer, an error is raised: >>> bob._p_jar = DummyJar() >>> writer.persistent_id(bob) ... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS Traceback (most recent call last): ... InvalidObjectReference: ('Attempt to store an object from a foreign database connection', <ZODB.serialize.DummyJar ...>, P(bob)) Constructor arguments used by __new__(), as returned by __getnewargs__(), can affect memory allocation, but may also change over the life of the object. This makes it useless to cache even the object's class. >>> class PNewArgs(P): ... def __getnewargs__(self): ... return () >>> sam = PNewArgs('sam') >>> writer.persistent_id(sam) 42 >>> sam._p_oid 42 >>> sam._p_jar is jar True Check that simple objects don't get accused of persistence: >>> writer.persistent_id(42) >>> writer.persistent_id(object()) Check that a classic class doesn't get identified improperly: >>> class ClassicClara: ... pass >>> clara = ClassicClara() >>> writer.persistent_id(clara) """ # Most objects are not persistent. The following cheap test # identifies most of them. For these, we return None, # signalling that the object should be pickled normally. if not isinstance(obj, (Persistent, type, WeakRef)): # Not persistent, pickle normally return None # Any persistent object must have an oid: try: oid = obj._p_oid except AttributeError: # Not persistent, pickle normally return None if not (oid is None or isinstance(oid, bytes)): # Deserves a closer look: # Make sure it's not a descriptor if hasattr(oid, '__get__'): # The oid is a descriptor. That means obj is a non-persistent # class whose instances are persistent, so ... # Not persistent, pickle normally return None if oid is WeakRefMarker: # we have a weakref, see weakref.py oid = obj.oid if oid is None: target = obj() # get the referenced object oid = target._p_oid if oid is None: # Here we are causing the object to be saved in # the database. One could argue that we shouldn't # do this, because a weakref should not cause an object # to be added. We'll be optimistic, though, and # assume that the object will be added eventually. oid = self._jar.new_oid() target._p_jar = self._jar target._p_oid = oid self._stack.append(target) obj.oid = oid obj.dm = target._p_jar obj.database_name = obj.dm.db().database_name if obj.dm is self._jar: return ['w', (oid, )] else: return ['w', (oid, obj.database_name)] # Since we have an oid, we have either a persistent instance # (an instance of Persistent), or a persistent class. # NOTE! Persistent classes don't (and can't) subclass persistent. database_name = None if oid is None: oid = obj._p_oid = self._jar.new_oid() obj._p_jar = self._jar self._stack.append(obj) elif obj._p_jar is not self._jar: if not self._jar.db().xrefs: raise InvalidObjectReference( "Database %r doesn't allow implicit cross-database " "references" % self._jar.db().database_name, self._jar, obj) try: otherdb = obj._p_jar.db() database_name = otherdb.database_name except AttributeError: otherdb = self if self._jar.db().databases.get(database_name) is not otherdb: raise InvalidObjectReference( "Attempt to store an object from a foreign " "database connection", self._jar, obj, ) if self._jar.get_connection(database_name) is not obj._p_jar: raise InvalidObjectReference( "Attempt to store a reference to an object from " "a separate connection to the same database or " "multidatabase", self._jar, obj, ) # OK, we have an object from another database. # Lets make sure the object ws not *just* loaded. if obj._p_jar._implicitlyAdding(oid): raise InvalidObjectReference( "A new object is reachable from multiple databases. " "Won't try to guess which one was correct!", self._jar, obj, ) klass = type(obj) if hasattr(klass, '__getnewargs__'): # We don't want to save newargs in object refs. # It's possible that __getnewargs__ is degenerate and # returns (), but we don't want to have to deghostify # the object to find out. # Note that this has the odd effect that, if the class has # __getnewargs__ of its own, we'll lose the optimization # of caching the class info. if database_name is not None: return ['n', (database_name, oid)] return oid # Note that we never get here for persistent classes. # We'll use direct refs for normal classes. if database_name is not None: return ['m', (database_name, oid, klass)] return oid, klass def serialize(self, obj): # We don't use __class__ here, because obj could be a persistent proxy. # We don't want to be fooled by proxies. klass = type(obj) # We want to serialize persistent classes by name if they have # a non-None non-empty module so as not to have a direct # ref. This is important when copying. We probably want to # revisit this in the future. newargs = getattr(obj, "__getnewargs__", None) if (isinstance(getattr(klass, '_p_oid', 0), _oidtypes) and klass.__module__): # This is a persistent class with a non-empty module. This # uses pickle format #3 or #7. klass = klass.__module__, klass.__name__ if newargs is None: meta = klass, None else: meta = klass, newargs() elif newargs is None: # Pickle format #1. meta = klass else: # Pickle format #2. meta = klass, newargs() return self._dump(meta, obj.__getstate__()) def _dump(self, classmeta, state): # To reuse the existing BytesIO object, we must reset # the file position to 0 and truncate the file after the # new pickle is written. self._file.seek(0) self._p.clear_memo() self._p.dump(classmeta) self._p.dump(state) self._file.truncate() return self._file.getvalue() def __iter__(self): return NewObjectIterator(self._stack)