def __init__(self, storage, cache_size=100000, root_class=None): """(storage:Storage|str, cache_size:int=100000, root_class:class|None=None) Make a connection to `storage`. Set the target number of non-ghosted persistent objects to keep in the cache at `cache_size`. If there is no root object yet, create it as an instance of the root_class (or PersistentDict, if root_class is None), calling the constructor with no arguments. Also, if the root_class is not None, verify that this really is the class of the root object. """ if isinstance(storage, str): from durus.file_storage import FileStorage storage = FileStorage(storage) assert isinstance(storage, durus.storage.Storage) self.storage = storage self.reader = ObjectReader(self) self.changed = {} self.invalid_oids = set() self.new_oid = storage.new_oid # needed by serialize self.cache = Cache(cache_size) self.root = self.get(ROOT_OID) if self.root is None: new_oid = self.new_oid() assert ROOT_OID == new_oid self.root = self.get_cache().get_instance( ROOT_OID, root_class or PersistentDict, self) self.root._p_set_status_saved() self.root.__class__.__init__(self.root) self.root._p_note_change() self.commit() assert root_class in (None, self.root.__class__)
def touch_every_reference(connection, *words): """(connection:Connection, *words:(str)) Mark as changed, every object whose pickled class/state contains any of the given words. This is useful when you move or rename a class, so that all references can be updated. """ get = connection.get reader = ObjectReader(connection) for oid, record in connection.get_storage().gen_oid_record(): record_oid, data, refs = unpack_record(record) state = reader.get_state_pickle(data) for word in words: if word in data or word in state: get(oid)._p_note_change()
def touch_every_reference(connection, *words): """(connection:Connection, *words:(str)) Mark as changed, every object whose pickled class/state contains any of the given words. This is useful when you move or rename a class, so that all references can be updated. """ get = connection.get reader = ObjectReader(connection) words = [as_bytes(w) for w in words] for oid, record in connection.get_storage().gen_oid_record(): record_oid, data, refs = unpack_record(record) state = reader.get_state_pickle(data) for word in words: if word in data or word in state: get(oid)._p_note_change()
def check_object_reader(self): class FakeConnection: pass self.r = r = ObjectReader(FakeConnection()) root = ('\x80\x02cdurus.persistent_dict\nPersistentDict\nq\x01.' '\x80\x02}q\x02U\x04dataq\x03}q\x04s.\x00\x00\x00\x00') root = as_bytes(root) assert r.get_ghost(root)._p_is_ghost()
def __init__(self, storage, cache_size=8000): """(storage:Storage, cache_size:int=8000) Make a connection to `storage`. Set the target number of non-ghosted persistent objects to keep in the cache at `cache_size`. """ assert isinstance(storage, Storage) self.storage = storage self.reader = ObjectReader(self) self.changed = {} self.invalid_oids = Set() self.loaded_oids = Set() try: storage.load(ROOT_OID) except KeyError: self.storage.begin() writer = ObjectWriter(self) data, refs = writer.get_state(PersistentDict()) writer.close() self.storage.store(ROOT_OID, pack_record(ROOT_OID, data, refs)) self.storage.end(self._handle_invalidations) self.new_oid = storage.new_oid # needed by serialize self.cache = Cache(cache_size) self.cache.hold(self.get_root())
class Connection (ConnectionBase): """ The Connection manages movement of objects in and out of storage. Instance attributes: storage: Storage cache: Cache reader: ObjectReader changed: {oid:str : PersistentObject} invalid_oids: set([str]) Set of oids of objects known to have obsolete state. transaction_serial: int Number of calls to commit() or abort() since this instance was created. This is used to maintain consistency, and to implement LRU replacement in the cache. """ def __init__(self, storage, cache_size=100000, root_class=None): """(storage:Storage|str, cache_size:int=100000, root_class:class|None=None) Make a connection to `storage`. Set the target number of non-ghosted persistent objects to keep in the cache at `cache_size`. If there is no root object yet, create it as an instance of the root_class (or PersistentDict, if root_class is None), calling the constructor with no arguments. Also, if the root_class is not None, verify that this really is the class of the root object. """ if isinstance(storage, str): from durus.file_storage import FileStorage storage = FileStorage(storage) assert isinstance(storage, durus.storage.Storage) self.storage = storage self.reader = ObjectReader(self) self.changed = {} self.invalid_oids = set() self.new_oid = storage.new_oid # needed by serialize self.cache = Cache(cache_size) self.root = self.get(ROOT_OID) if self.root is None: new_oid = self.new_oid() assert ROOT_OID == new_oid self.root = self.get_cache().get_instance( ROOT_OID, root_class or PersistentDict, self) self.root._p_set_status_saved() self.root.__class__.__init__(self.root) self.root._p_note_change() self.commit() assert root_class in (None, self.root.__class__) def get_storage(self): """() -> Storage""" return self.storage def get_cache_count(self): """() -> int Return the number of PersistentObject instances currently in the cache. """ return self.cache.get_count() def get_cache_size(self): """() -> cache_size:int Return the target size for the cache. """ return self.cache.get_size() def set_cache_size(self, size): """(size:int) Set the target size for the cache. """ self.cache.set_size(size) def get_transaction_serial(self): """() -> int Return the number of calls to commit() or abort() on this instance. """ return self.transaction_serial def get_root(self): """() -> PersistentObject Returns the root object. """ return self.root def get_stored_pickle(self, oid): """(oid:str) -> str Retrieve the pickle from storage. Will raise ReadConflictError if the oid is invalid. """ assert oid not in self.invalid_oids, "still conflicted: missing abort()" try: record = self.storage.load(oid) except ReadConflictError: invalid_oids = self.storage.sync() self._handle_invalidations(invalid_oids, read_oid=oid) record = self.storage.load(oid) oid2, data, refdata = unpack_record(record) assert as_bytes(oid) == oid2, (oid, oid2) return data def get(self, oid): """(oid:str|int|long) -> PersistentObject | None Return object for `oid`. The object may be a ghost. """ if not isinstance(oid, byte_string): oid = int8_to_str(oid) obj = self.cache.get(oid) if obj is not None: return obj try: data = self.get_stored_pickle(oid) except KeyError: return None klass = loads(data) obj = self.cache.get_instance(oid, klass, self) state = self.reader.get_state(data, load=True) obj.__setstate__(state) obj._p_set_status_saved() return obj __getitem__ = get def get_crawler(self, start_oid=ROOT_OID, batch_size=100): """(start_oid:str = ROOT_OID, batch_size:int = 100) -> sequence(PersistentObject) Returns a generator for the sequence of objects in a breadth first traversal of the object graph, starting at the given start_oid. The objects in the sequence have their state loaded at the same time, so this can be used to initialize the object cache. This uses the storage's bulk_load() method to make it faster. The batch_size argument sets the number of object records loaded on each call to bulk_load(). """ oid_record_sequence = self.storage.gen_oid_record( start_oid=start_oid, batch_size=batch_size) for oid, record in oid_record_sequence: obj = self.cache.get(oid) if obj is not None and not obj._p_is_ghost(): yield obj else: record_oid, data, refdata = unpack_record(record) if obj is None: klass = loads(data) obj = self.cache.get_instance(oid, klass, self) state = self.reader.get_state(data, load=True) obj.__setstate__(state) obj._p_set_status_saved() yield obj def get_cache(self): return self.cache def load_state(self, obj): """(obj:PersistentObject) Load the state for the given ghost object. """ assert self.storage is not None, 'connection is closed' assert obj._p_is_ghost() oid = obj._p_oid try: pickle = self.get_stored_pickle(oid) except DurusKeyError: # We have a ghost but cannot find the state for it. This can # happen if the object was removed from the storage as a result # of packing. raise ReadConflictError([oid]) state = self.reader.get_state(pickle) obj.__setstate__(state) obj._p_set_status_saved() def get_load_count(self): """() -> int Returns the number of times that any object's state has been loaded. """ return self.reader.get_load_count() def note_access(self, obj): assert obj._p_connection is self assert obj._p_oid is not None obj._p_serial = self.transaction_serial self.cache.recent_objects.add(obj) def note_change(self, obj): """(obj:PersistentObject) This is done when any persistent object is changed. Changed objects will be stored when the transaction is committed or rolled back, i.e. made into ghosts, on abort. """ # assert obj._p_connection is self self.changed[obj._p_oid] = obj def shrink_cache(self): """ If the number of saved and unsaved objects is more than twice the target cache size (and the target cache size is positive), try to ghostify enough of the saved objects to achieve the target cache size. """ self.cache.shrink(self) def _sync(self): """ Process all invalid_oids so that all non-ghost objects are current. """ invalid_oids = self.storage.sync() self.invalid_oids.update(invalid_oids) for oid in self.invalid_oids: obj = self.cache.get(oid) if obj is not None: obj._p_set_status_ghost() self.invalid_oids.clear() def abort(self): """ Abort uncommitted changes, sync, and try to shrink the cache. """ for oid, obj in iteritems(self.changed): obj._p_set_status_ghost() self.changed.clear() self._sync() self.shrink_cache() self.transaction_serial += 1 def commit(self): """ If there are any changes, try to store them, and raise WriteConflictError if there are any invalid oids saved or if there are any invalid oids for non-ghost objects. """ if not self.changed: self._sync() else: assert not self.invalid_oids, "still conflicted: missing abort()" self.storage.begin() new_objects = {} for oid, changed_object in iteritems(self.changed): writer = ObjectWriter(self) try: for obj in writer.gen_new_objects(changed_object): oid = obj._p_oid if oid in new_objects: continue elif oid not in self.changed: new_objects[oid] = obj self.cache[oid] = obj data, refs = writer.get_state(obj) self.storage.store(oid, pack_record(oid, data, refs)) obj._p_set_status_saved() finally: writer.close() try: self.storage.end(self._handle_invalidations) except ConflictError: for oid, obj in iteritems(new_objects): obj._p_oid = None del self.cache[oid] obj._p_set_status_unsaved() obj._p_connection = None raise self.changed.clear() self.shrink_cache() self.transaction_serial += 1 def _handle_invalidations(self, oids, read_oid=None): """(oids:[str], read_oid:str=None) Check if any of the oids are for objects that were accessed during this transaction. If so, raise the appropriate conflict exception. """ conflicts = [] for oid in oids: obj = self.cache.get(oid) if obj is None: continue if obj._p_serial == self.transaction_serial: conflicts.append(oid) self.invalid_oids.add(oid) elif not obj._p_is_ghost(): assert oid not in self.changed obj._p_set_status_ghost() if conflicts: if read_oid is None: raise WriteConflictError(conflicts) else: raise ReadConflictError([read_oid]) def pack(self): """Clear any uncommited changes and pack the storage.""" self.abort() self.storage.pack()
class Connection(ConnectionBase): """ The Connection manages movement of objects in and out of storage. Instance attributes: storage: Storage cache: Cache reader: ObjectReader changed: {oid:str : PersistentObject} invalid_oids: set([str]) Set of oids of objects known to have obsolete state. transaction_serial: int Number of calls to commit() or abort() since this instance was created. This is used to maintain consistency, and to implement LRU replacement in the cache. """ def __init__(self, storage, cache_size=100000, root_class=None): """(storage:Storage|str, cache_size:int=100000, root_class:class|None=None) Make a connection to `storage`. Set the target number of non-ghosted persistent objects to keep in the cache at `cache_size`. If there is no root object yet, create it as an instance of the root_class (or PersistentDict, if root_class is None), calling the constructor with no arguments. Also, if the root_class is not None, verify that this really is the class of the root object. """ if isinstance(storage, str): from durus.file_storage import FileStorage storage = FileStorage(storage) assert isinstance(storage, durus.storage.Storage) self.storage = storage self.reader = ObjectReader(self) self.changed = {} self.invalid_oids = set() self.new_oid = storage.new_oid # needed by serialize self.cache = Cache(cache_size) self.root = self.get(ROOT_OID) if self.root is None: new_oid = self.new_oid() assert ROOT_OID == new_oid self.root = self.get_cache().get_instance( ROOT_OID, root_class or PersistentDict, self) self.root._p_set_status_saved() self.root.__class__.__init__(self.root) self.root._p_note_change() self.commit() assert root_class in (None, self.root.__class__) def get_storage(self): """() -> Storage""" return self.storage def get_cache_count(self): """() -> int Return the number of PersistentObject instances currently in the cache. """ return self.cache.get_count() def get_cache_size(self): """() -> cache_size:int Return the target size for the cache. """ return self.cache.get_size() def set_cache_size(self, size): """(size:int) Set the target size for the cache. """ self.cache.set_size(size) def get_transaction_serial(self): """() -> int Return the number of calls to commit() or abort() on this instance. """ return self.transaction_serial def get_root(self): """() -> PersistentObject Returns the root object. """ return self.root def get_stored_pickle(self, oid): """(oid:str) -> str Retrieve the pickle from storage. Will raise ReadConflictError if the oid is invalid. """ assert oid not in self.invalid_oids, "still conflicted: missing abort()" try: record = self.storage.load(oid) except ReadConflictError: invalid_oids = self.storage.sync() self._handle_invalidations(invalid_oids, read_oid=oid) record = self.storage.load(oid) oid2, data, refdata = unpack_record(record) assert as_bytes(oid) == oid2, (oid, oid2) return data def get(self, oid): """(oid:str|int|long) -> PersistentObject | None Return object for `oid`. The object may be a ghost. """ if not isinstance(oid, byte_string): oid = int8_to_str(oid) obj = self.cache.get(oid) if obj is not None: return obj try: data = self.get_stored_pickle(oid) except KeyError: return None klass = loads(data) obj = self.cache.get_instance(oid, klass, self) state = self.reader.get_state(data, load=True) obj.__setstate__(state) obj._p_set_status_saved() return obj __getitem__ = get def get_crawler(self, start_oid=ROOT_OID, batch_size=100): """(start_oid:str = ROOT_OID, batch_size:int = 100) -> sequence(PersistentObject) Returns a generator for the sequence of objects in a breadth first traversal of the object graph, starting at the given start_oid. The objects in the sequence have their state loaded at the same time, so this can be used to initialize the object cache. This uses the storage's bulk_load() method to make it faster. The batch_size argument sets the number of object records loaded on each call to bulk_load(). """ oid_record_sequence = self.storage.gen_oid_record( start_oid=start_oid, batch_size=batch_size) for oid, record in oid_record_sequence: obj = self.cache.get(oid) if obj is not None and not obj._p_is_ghost(): yield obj else: record_oid, data, refdata = unpack_record(record) if obj is None: klass = loads(data) obj = self.cache.get_instance(oid, klass, self) state = self.reader.get_state(data, load=True) obj.__setstate__(state) obj._p_set_status_saved() yield obj def get_cache(self): return self.cache def load_state(self, obj): """(obj:PersistentObject) Load the state for the given ghost object. """ assert self.storage is not None, 'connection is closed' assert obj._p_is_ghost() oid = obj._p_oid try: pickle = self.get_stored_pickle(oid) except DurusKeyError: # We have a ghost but cannot find the state for it. This can # happen if the object was removed from the storage as a result # of packing. raise ReadConflictError([oid]) state = self.reader.get_state(pickle) obj.__setstate__(state) obj._p_set_status_saved() def get_load_count(self): """() -> int Returns the number of times that any object's state has been loaded. """ return self.reader.get_load_count() def note_access(self, obj): assert obj._p_connection is self assert obj._p_oid is not None obj._p_serial = self.transaction_serial self.cache.recent_objects.add(obj) def note_change(self, obj): """(obj:PersistentObject) This is done when any persistent object is changed. Changed objects will be stored when the transaction is committed or rolled back, i.e. made into ghosts, on abort. """ # assert obj._p_connection is self self.changed[obj._p_oid] = obj def shrink_cache(self): """ If the number of saved and unsaved objects is more than twice the target cache size (and the target cache size is positive), try to ghostify enough of the saved objects to achieve the target cache size. """ self.cache.shrink(self) def _sync(self): """ Process all invalid_oids so that all non-ghost objects are current. """ invalid_oids = self.storage.sync() self.invalid_oids.update(invalid_oids) for oid in self.invalid_oids: obj = self.cache.get(oid) if obj is not None: obj._p_set_status_ghost() self.invalid_oids.clear() def abort(self): """ Abort uncommitted changes, sync, and try to shrink the cache. """ for oid, obj in iteritems(self.changed): obj._p_set_status_ghost() self.changed.clear() self._sync() self.shrink_cache() self.transaction_serial += 1 def commit(self): """ If there are any changes, try to store them, and raise WriteConflictError if there are any invalid oids saved or if there are any invalid oids for non-ghost objects. """ if not self.changed: self._sync() else: assert not self.invalid_oids, "still conflicted: missing abort()" self.storage.begin() new_objects = {} for oid, changed_object in iteritems(self.changed): writer = ObjectWriter(self) try: for obj in writer.gen_new_objects(changed_object): oid = obj._p_oid if oid in new_objects: continue elif oid not in self.changed: new_objects[oid] = obj self.cache[oid] = obj data, refs = writer.get_state(obj) self.storage.store(oid, pack_record(oid, data, refs)) obj._p_set_status_saved() finally: writer.close() try: self.storage.end(self._handle_invalidations) except ConflictError: for oid, obj in iteritems(new_objects): obj._p_oid = None del self.cache[oid] obj._p_set_status_unsaved() obj._p_connection = None raise self.changed.clear() self.shrink_cache() self.transaction_serial += 1 def _handle_invalidations(self, oids, read_oid=None): """(oids:[str], read_oid:str=None) Check if any of the oids are for objects that were accessed during this transaction. If so, raise the appropriate conflict exception. """ conflicts = [] for oid in oids: obj = self.cache.get(oid) if obj is None: continue if obj._p_serial == self.transaction_serial: conflicts.append(oid) self.invalid_oids.add(oid) elif not obj._p_is_ghost(): assert oid not in self.changed obj._p_set_status_ghost() if conflicts: if read_oid is None: raise WriteConflictError(conflicts) else: raise ReadConflictError([read_oid]) def pack(self): """Clear any uncommited changes and pack the storage.""" self.abort() self.storage.pack()
class Connection(object): """ The Connection manages movement of objects in and out of storage. Instance attributes: storage: Storage cache: Cache reader: ObjectReader changed: {oid:str : Persistent} invalid_oids: Set([str]) Set of oids of objects known to have obsolete state. loaded_oids : Set([str]) Set of oids of objects that were in the SAVED state at some time during the current transaction. """ def __init__(self, storage, cache_size=8000): """(storage:Storage, cache_size:int=8000) Make a connection to `storage`. Set the target number of non-ghosted persistent objects to keep in the cache at `cache_size`. """ assert isinstance(storage, Storage) self.storage = storage self.reader = ObjectReader(self) self.changed = {} self.invalid_oids = Set() self.loaded_oids = Set() try: storage.load(ROOT_OID) except KeyError: self.storage.begin() writer = ObjectWriter(self) data, refs = writer.get_state(PersistentDict()) writer.close() self.storage.store(ROOT_OID, pack_record(ROOT_OID, data, refs)) self.storage.end(self._handle_invalidations) self.new_oid = storage.new_oid # needed by serialize self.cache = Cache(cache_size) self.cache.hold(self.get_root()) def get_storage(self): """() -> Storage""" return self.storage def get_cache_count(self): """() -> int Return the number of Persistent instances currently in the cache. """ return self.cache.get_count() def get_cache_size(self): """() -> cache_size:int Return the target size for the cache. """ return self.cache.get_size() def set_cache_size(self, size): """(size:int) Set the target size for the cache. """ self.cache.set_size(size) def get_root(self): """() -> Persistent Returns the root object. """ return self.get(ROOT_OID) def get_stored_pickle(self, oid): """(oid:str) -> str Retrieve the pickle from storage. Will raise ReadConflictError if pickle the pickle is invalid. """ if oid in self.invalid_oids: # someone is still trying to read after getting a conflict raise ReadConflictError([oid]) try: record = self.storage.load(oid) except ReadConflictError: invalid_oids = self.storage.sync() self._handle_invalidations(invalid_oids, read_oid=oid) record = self.storage.load(oid) oid2, data, refdata = unpack_record(record) assert oid == oid2 return data def get(self, oid): """(oid:str|int|long) -> Persistent | None Return object for `oid`. The object may be a ghost. """ if type(oid) is not str: oid = p64(oid) obj = self.cache.get(oid) if obj is not None: return obj try: pickle = self.get_stored_pickle(oid) except KeyError: return None obj = self.reader.get_ghost(pickle) obj._p_oid = oid obj._p_connection = self obj._p_set_status_ghost() self.cache[oid] = obj return obj __getitem__ = get def cache_get(self, oid): return self.cache.get(oid) def cache_set(self, oid, obj): self.cache[oid] = obj def load_state(self, obj): """(obj:Persistent) Load the state for the given ghost object. """ assert self.storage is not None, 'connection is closed' assert obj._p_is_ghost() oid = obj._p_oid setstate = obj.__setstate__ try: pickle = self.get_stored_pickle(oid) except DurusKeyError: # We have a ghost but cannot find the state for it. This can # happen if the object was removed from the storage as a result # of packing. raise ReadConflictError([oid]) state = self.reader.get_state(pickle) setstate(state) def note_change(self, obj): """(obj:Persistent) This is done when any persistent object is changed. Changed objects will be stored when the transaction is committed or rolled back, i.e. made into ghosts, on abort. """ # assert obj._p_connection is self self.changed[obj._p_oid] = obj def note_saved(self, obj): self.loaded_oids.add(obj._p_oid) def shrink_cache(self): """ If the number of saved and unsaved objects is more than twice the target cache size (and the target cache size is positive), try to ghostify enough of the saved objects to achieve the target cache size. """ self.cache.shrink(self.loaded_oids) def _sync(self): """ Process all invalid_oids so that all non-ghost objects are current. """ invalid_oids = self.storage.sync() self.invalid_oids.update(invalid_oids) for oid in self.invalid_oids: obj = self.cache.get(oid) if obj is not None: obj._p_set_status_ghost() self.loaded_oids.discard(oid) self.invalid_oids.clear() def abort(self): """ Abort uncommitted changes, sync, and try to shrink the cache. """ for oid, obj in self.changed.iteritems(): obj._p_set_status_ghost() self.loaded_oids.discard(oid) self.changed.clear() self._sync() self.shrink_cache() def commit(self): """ If there are any changes, try to store them, and raise ConflictError if there are any invalid oids saved or if there are any invalid oids for non-ghost objects. """ if not self.changed: self._sync() else: if self.invalid_oids: # someone is trying to commit after a read or write conflict raise ConflictError(list(self.invalid_oids)) self.storage.begin() new_objects = {} for oid, changed_object in self.changed.iteritems(): writer = ObjectWriter(self) try: for obj in writer.gen_new_objects(changed_object): oid = obj._p_oid if oid in new_objects: continue elif oid not in self.changed: new_objects[oid] = obj self.cache[oid] = obj data, refs = writer.get_state(obj) self.storage.store(oid, pack_record(oid, data, refs)) obj._p_set_status_saved() finally: writer.close() try: self.storage.end(self._handle_invalidations) except ConflictError, exc: for oid, obj in new_objects.iteritems(): del self.cache[oid] self.loaded_oids.discard(oid) obj._p_set_status_unsaved() obj._p_oid = None obj._p_connection = None raise self.changed.clear() self.shrink_cache()