def cloneByPickle(obj, ignore_list=()):
    """Makes a copy of a ZODB object, loading ghosts as needed.

    Ignores specified objects along the way, replacing them with None
    in the copy.
    """
    ignore_dict = {}
    for o in ignore_list:
        ignore_dict[id(o)] = o

    def persistent_id(ob, ignore_dict=ignore_dict):
        if id(ob) in ignore_dict:
            return 'ignored'
        if getattr(ob, '_p_changed', 0) is None:
            ob._p_changed = 0
        return None

    def persistent_load(ref):
        assert ref == 'ignored'
        # Return a placeholder object that will be replaced by
        # removeNonVersionedData().
        placeholder = SimpleItem()
        placeholder.id = "ignored_subobject"
        return placeholder

    stream = BytesIO()
    p = Pickler(stream, 1)
    p.persistent_id = persistent_id
    p.dump(obj)
    stream.seek(0)
    u = Unpickler(stream)
    u.persistent_load = persistent_load
    return u.load()
def cloneByPickle(obj, ignore_list=()):
    """Makes a copy of a ZODB object, loading ghosts as needed.

    Ignores specified objects along the way, replacing them with None
    in the copy.
    """
    ignore_dict = {}
    for o in ignore_list:
        ignore_dict[id(o)] = o

    def persistent_id(ob, ignore_dict=ignore_dict):
        if id(ob) in ignore_dict:
            return 'ignored'
        if getattr(ob, '_p_changed', 0) is None:
            ob._p_changed = 0
        return None

    def persistent_load(ref):
        assert ref == 'ignored'
        # Return a placeholder object that will be replaced by
        # removeNonVersionedData().
        placeholder = SimpleItem()
        placeholder.id = "ignored_subobject"
        return placeholder

    stream = BytesIO()
    p = Pickler(stream, 1)
    p.persistent_id = persistent_id
    p.dump(obj)
    stream.seek(0)
    u = Unpickler(stream)
    u.persistent_load = persistent_load
    return u.load()
Example #3
0
class ObjectWriter:
    """Serializes objects for storage in the database.

    The ObjectWriter creates object pickles in the ZODB format.  It
    also detects new persistent objects reachable from the current
    object.
    """

    _jar = None

    def __init__(self, obj=None):
        self._file = BytesIO()
        self._p = Pickler(self._file, _protocol)
        if sys.version_info[0] < 3:
            self._p.inst_persistent_id = self.persistent_id
            # PyPy uses a python implementation of cPickle in both Python 2
            # and Python 3. We can't really detect inst_persistent_id as its
            # a magic attribute that's not readable, but it doesn't hurt to
            # simply always assign to persistent_id also
            self._p.persistent_id = self.persistent_id
        else:
            self._p.persistent_id = self.persistent_id
        self._stack = []
        if obj is not None:
            self._stack.append(obj)
            jar = obj._p_jar
            assert myhasattr(jar, "new_oid")
            self._jar = jar

    def persistent_id(self, obj):
        """Return the persistent id for obj.

        >>> from ZODB.tests.util import P
        >>> class DummyJar:
        ...     xrefs = True
        ...     def new_oid(self):
        ...         return 42
        ...     def db(self):
        ...         return self
        ...     databases = {}

        >>> jar = DummyJar()
        >>> class O:
        ...     _p_jar = jar
        >>> writer = ObjectWriter(O)

        Normally, object references include the oid and a cached named
        reference to the class.  Having the class information
        available allows fast creation of the ghost, avoiding
        requiring an additional database lookup.

        >>> bob = P('bob')
        >>> oid, cls = writer.persistent_id(bob)
        >>> oid
        42
        >>> cls is P
        True

        If a persistent object does not already have an oid and jar,
        these will be assigned by persistent_id():

        >>> bob._p_oid
        42
        >>> bob._p_jar is jar
        True

        If the object already has a persistent id, the id is not changed:

        >>> bob._p_oid = 24
        >>> oid, cls = writer.persistent_id(bob)
        >>> oid
        24
        >>> cls is P
        True

        If the jar doesn't match that of the writer, an error is raised:

        >>> bob._p_jar = DummyJar()
        >>> writer.persistent_id(bob)
        ... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
        Traceback (most recent call last):
          ...
        InvalidObjectReference:
        ('Attempt to store an object from a foreign database connection',
        <ZODB.serialize.DummyJar ...>, P(bob))

        Constructor arguments used by __new__(), as returned by
        __getnewargs__(), can affect memory allocation, but may also
        change over the life of the object.  This makes it useless to
        cache even the object's class.

        >>> class PNewArgs(P):
        ...     def __getnewargs__(self):
        ...         return ()

        >>> sam = PNewArgs('sam')
        >>> writer.persistent_id(sam)
        42
        >>> sam._p_oid
        42
        >>> sam._p_jar is jar
        True

        Check that simple objects don't get accused of persistence:

        >>> writer.persistent_id(42)
        >>> writer.persistent_id(object())

        Check that a classic class doesn't get identified improperly:

        >>> class ClassicClara:
        ...    pass
        >>> clara = ClassicClara()

        >>> writer.persistent_id(clara)
        """

        # Most objects are not persistent. The following cheap test
        # identifies most of them.  For these, we return None,
        # signalling that the object should be pickled normally.
        if not isinstance(obj, (Persistent, type, WeakRef)):
            # Not persistent, pickle normally
            return None

        # Any persistent object must have an oid:
        try:
            oid = obj._p_oid
        except AttributeError:
            # Not persistent, pickle normally
            return None

        if not (oid is None or isinstance(oid, bytes)):
            # Deserves a closer look:

            # Make sure it's not a descriptor
            if hasattr(oid, "__get__"):
                # The oid is a descriptor.  That means obj is a non-persistent
                # class whose instances are persistent, so ...
                # Not persistent, pickle normally
                return None

            if oid is WeakRefMarker:
                # we have a weakref, see weakref.py

                oid = obj.oid
                if oid is None:
                    target = obj()  # get the referenced object
                    oid = target._p_oid
                    if oid is None:
                        # Here we are causing the object to be saved in
                        # the database. One could argue that we shouldn't
                        # do this, because a weakref should not cause an object
                        # to be added.  We'll be optimistic, though, and
                        # assume that the object will be added eventually.

                        oid = self._jar.new_oid()
                        target._p_jar = self._jar
                        target._p_oid = oid
                        self._stack.append(target)
                    obj.oid = oid
                    obj.dm = target._p_jar
                    obj.database_name = obj.dm.db().database_name
                if obj.dm is self._jar:
                    return ["w", (oid,)]
                else:
                    return ["w", (oid, obj.database_name)]

        # Since we have an oid, we have either a persistent instance
        # (an instance of Persistent), or a persistent class.

        # NOTE! Persistent classes don't (and can't) subclass persistent.

        database_name = None

        if oid is None:
            oid = obj._p_oid = self._jar.new_oid()
            obj._p_jar = self._jar
            self._stack.append(obj)

        elif obj._p_jar is not self._jar:
            if not self._jar.db().xrefs:
                raise InvalidObjectReference(
                    "Database %r doesn't allow implicit cross-database " "references" % self._jar.db().database_name,
                    self._jar,
                    obj,
                )

            try:
                otherdb = obj._p_jar.db()
                database_name = otherdb.database_name
            except AttributeError:
                otherdb = self

            if self._jar.db().databases.get(database_name) is not otherdb:
                raise InvalidObjectReference(
                    "Attempt to store an object from a foreign " "database connection", self._jar, obj
                )

            if self._jar.get_connection(database_name) is not obj._p_jar:
                raise InvalidObjectReference(
                    "Attempt to store a reference to an object from "
                    "a separate connection to the same database or "
                    "multidatabase",
                    self._jar,
                    obj,
                )

            # OK, we have an object from another database.
            # Lets make sure the object ws not *just* loaded.

            if obj._p_jar._implicitlyAdding(oid):
                raise InvalidObjectReference(
                    "A new object is reachable from multiple databases. " "Won't try to guess which one was correct!",
                    self._jar,
                    obj,
                )

        klass = type(obj)
        if hasattr(klass, "__getnewargs__"):
            # We don't want to save newargs in object refs.
            # It's possible that __getnewargs__ is degenerate and
            # returns (), but we don't want to have to deghostify
            # the object to find out.

            # Note that this has the odd effect that, if the class has
            # __getnewargs__ of its own, we'll lose the optimization
            # of caching the class info.

            if database_name is not None:
                return ["n", (database_name, oid)]

            return oid

        # Note that we never get here for persistent classes.
        # We'll use direct refs for normal classes.

        if database_name is not None:
            return ["m", (database_name, oid, klass)]

        return oid, klass

    def serialize(self, obj):
        # We don't use __class__ here, because obj could be a persistent proxy.
        # We don't want to be fooled by proxies.
        klass = type(obj)

        # We want to serialize persistent classes by name if they have
        # a non-None non-empty module so as not to have a direct
        # ref. This is important when copying.  We probably want to
        # revisit this in the future.
        newargs = getattr(obj, "__getnewargs__", None)
        if isinstance(getattr(klass, "_p_oid", 0), _oidtypes) and klass.__module__:
            # This is a persistent class with a non-empty module.  This
            # uses pickle format #3 or #7.
            klass = klass.__module__, klass.__name__
            if newargs is None:
                meta = klass, None
            else:
                meta = klass, newargs()
        elif newargs is None:
            # Pickle format #1.
            meta = klass
        else:
            # Pickle format #2.
            meta = klass, newargs()

        return self._dump(meta, obj.__getstate__())

    def _dump(self, classmeta, state):
        # To reuse the existing BytesIO object, we must reset
        # the file position to 0 and truncate the file after the
        # new pickle is written.
        self._file.seek(0)
        self._p.clear_memo()
        self._p.dump(classmeta)
        self._p.dump(state)
        self._file.truncate()
        return self._file.getvalue()

    def __iter__(self):
        return NewObjectIterator(self._stack)
Example #4
0
class ObjectWriter:
    """Serializes objects for storage in the database.

    The ObjectWriter creates object pickles in the ZODB format.  It
    also detects new persistent objects reachable from the current
    object.
    """

    _jar = None

    def __init__(self, obj=None):
        self._file = BytesIO()
        self._p = PersistentPickler(self.persistent_id, self._file, _protocol)
        self._stack = []
        if obj is not None:
            self._stack.append(obj)
            jar = obj._p_jar
            assert myhasattr(jar, "new_oid")
            self._jar = jar

    def persistent_id(self, obj):
        """Return the persistent id for obj.

        >>> from ZODB.tests.util import P
        >>> class DummyJar:
        ...     xrefs = True
        ...     def new_oid(self):
        ...         return 42
        ...     def db(self):
        ...         return self
        ...     databases = {}

        >>> jar = DummyJar()
        >>> class O:
        ...     _p_jar = jar
        >>> writer = ObjectWriter(O)

        Normally, object references include the oid and a cached named
        reference to the class.  Having the class information
        available allows fast creation of the ghost, avoiding
        requiring an additional database lookup.

        >>> bob = P('bob')
        >>> oid, cls = writer.persistent_id(bob)
        >>> oid
        42
        >>> cls is P
        True

        If a persistent object does not already have an oid and jar,
        these will be assigned by persistent_id():

        >>> bob._p_oid
        42
        >>> bob._p_jar is jar
        True

        If the object already has a persistent id, the id is not changed:

        >>> bob._p_oid = 24
        >>> oid, cls = writer.persistent_id(bob)
        >>> oid
        24
        >>> cls is P
        True

        If the jar doesn't match that of the writer, an error is raised:

        >>> bob._p_jar = DummyJar()
        >>> writer.persistent_id(bob)
        ... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
        Traceback (most recent call last):
          ...
        InvalidObjectReference:
        ('Attempt to store an object from a foreign database connection',
        <ZODB.serialize.DummyJar ...>, P(bob))

        Constructor arguments used by __new__(), as returned by
        __getnewargs__(), can affect memory allocation, but may also
        change over the life of the object.  This makes it useless to
        cache even the object's class.

        >>> class PNewArgs(P):
        ...     def __getnewargs__(self):
        ...         return ()

        >>> sam = PNewArgs('sam')
        >>> writer.persistent_id(sam)
        42
        >>> sam._p_oid
        42
        >>> sam._p_jar is jar
        True

        Check that simple objects don't get accused of persistence:

        >>> writer.persistent_id(42)
        >>> writer.persistent_id(object())

        Check that a classic class doesn't get identified improperly:

        >>> class ClassicClara:
        ...    pass
        >>> clara = ClassicClara()

        >>> writer.persistent_id(clara)
        """

        # Most objects are not persistent. The following cheap test
        # identifies most of them.  For these, we return None,
        # signalling that the object should be pickled normally.
        if not isinstance(obj, (Persistent, type, WeakRef)):
            # Not persistent, pickle normally
            return None

        # Any persistent object must have an oid:
        try:
            oid = obj._p_oid
        except AttributeError:
            # Not persistent, pickle normally
            return None

        if not (oid is None or isinstance(oid, bytes)):
            # Deserves a closer look:

            # Make sure it's not a descriptor
            if hasattr(oid, '__get__'):
                # The oid is a descriptor.  That means obj is a non-persistent
                # class whose instances are persistent, so ...
                # Not persistent, pickle normally
                return None

            if oid is WeakRefMarker:
                # we have a weakref, see weakref.py

                oid = obj.oid
                if oid is None:
                    target = obj()  # get the referenced object
                    oid = target._p_oid
                    if oid is None:
                        # Here we are causing the object to be saved in
                        # the database. One could argue that we shouldn't
                        # do this, because a weakref should not cause an object
                        # to be added.  We'll be optimistic, though, and
                        # assume that the object will be added eventually.

                        oid = self._jar.new_oid()
                        target._p_jar = self._jar
                        target._p_oid = oid
                        self._stack.append(target)
                    obj.oid = oid
                    obj.dm = target._p_jar
                    obj.database_name = obj.dm.db().database_name
                if obj.dm is self._jar:
                    return ['w', (oid, )]
                else:
                    return ['w', (oid, obj.database_name)]

        # Since we have an oid, we have either a persistent instance
        # (an instance of Persistent), or a persistent class.

        # NOTE! Persistent classes don't (and can't) subclass persistent.

        database_name = None

        if oid is None:
            oid = obj._p_oid = self._jar.new_oid()
            obj._p_jar = self._jar
            self._stack.append(obj)

        elif obj._p_jar is not self._jar:
            if not self._jar.db().xrefs:
                raise InvalidObjectReference(
                    "Database %r doesn't allow implicit cross-database "
                    "references" % self._jar.db().database_name, self._jar,
                    obj)

            try:
                otherdb = obj._p_jar.db()
                database_name = otherdb.database_name
            except AttributeError:
                otherdb = self

            if self._jar.db().databases.get(database_name) is not otherdb:
                raise InvalidObjectReference(
                    "Attempt to store an object from a foreign "
                    "database connection",
                    self._jar,
                    obj,
                )

            if self._jar.get_connection(database_name) is not obj._p_jar:
                raise InvalidObjectReference(
                    "Attempt to store a reference to an object from "
                    "a separate connection to the same database or "
                    "multidatabase",
                    self._jar,
                    obj,
                )

            # OK, we have an object from another database.
            # Lets make sure the object ws not *just* loaded.

            if obj._p_jar._implicitlyAdding(oid):
                raise InvalidObjectReference(
                    "A new object is reachable from multiple databases. "
                    "Won't try to guess which one was correct!",
                    self._jar,
                    obj,
                )

        klass = type(obj)
        if hasattr(klass, '__getnewargs__'):
            # We don't want to save newargs in object refs.
            # It's possible that __getnewargs__ is degenerate and
            # returns (), but we don't want to have to deghostify
            # the object to find out.

            # Note that this has the odd effect that, if the class has
            # __getnewargs__ of its own, we'll lose the optimization
            # of caching the class info.

            if database_name is not None:
                return ['n', (database_name, oid)]

            return oid

        # Note that we never get here for persistent classes.
        # We'll use direct refs for normal classes.

        if database_name is not None:
            return ['m', (database_name, oid, klass)]

        return oid, klass

    def serialize(self, obj):
        # We don't use __class__ here, because obj could be a persistent proxy.
        # We don't want to be fooled by proxies.
        klass = type(obj)

        # We want to serialize persistent classes by name if they have
        # a non-None non-empty module so as not to have a direct
        # ref. This is important when copying.  We probably want to
        # revisit this in the future.
        newargs = getattr(obj, "__getnewargs__", None)
        if (isinstance(getattr(klass, '_p_oid', 0), _oidtypes)
                and klass.__module__):
            # This is a persistent class with a non-empty module.  This
            # uses pickle format #3 or #7.
            klass = klass.__module__, klass.__name__
            if newargs is None:
                meta = klass, None
            else:
                meta = klass, newargs()
        elif newargs is None:
            # Pickle format #1.
            meta = klass
        else:
            # Pickle format #2.
            meta = klass, newargs()

        return self._dump(meta, obj.__getstate__())

    def _dump(self, classmeta, state):
        # To reuse the existing BytesIO object, we must reset
        # the file position to 0 and truncate the file after the
        # new pickle is written.
        self._file.seek(0)
        self._p.clear_memo()
        self._p.dump(classmeta)
        self._p.dump(state)
        self._file.truncate()
        return self._file.getvalue()

    def __iter__(self):
        return NewObjectIterator(self._stack)