def gen_items(records): for record in records: oid, data, refdata = unpack_record(record) yield str_to_int8(oid), as_bytes(data), as_bytes(refdata) if self.pack_extra is not None: # ensure object and refs are marked alive and not removed self.pack_extra.append(oid)
def gen_oid_record(self, start_oid=None, batch_size=100): """(start_oid:str = None, batch_size:int = 100) -> sequence((oid:str, record:str)) Returns a generator for the sequence of (oid, record) pairs. If a start_oid is given, the resulting sequence follows a breadth-first traversal of the object graph, starting at the given start_oid. This uses the storage's bulk_load() method because that is faster in some cases. The batch_size argument sets the number of object records loaded on each call to bulk_load(). If no start_oid is given, the sequence may include oids and records that are not reachable from the root. """ if start_oid is None: start_oid = durus.connection.ROOT_OID todo = deque([start_oid]) seen = set() while todo: batch = [] while todo and len(batch) < batch_size: oid = todo.popleft() if oid not in seen: batch.append(oid) seen.add(oid) for record in self.bulk_load(batch): oid, data, refdata = unpack_record(record) yield oid, record for ref in split_oids(refdata): if ref not in seen: todo.append(ref)
def get_crawler(self, start_oid=ROOT_OID, batch_size=100): """(start_oid:str = ROOT_OID, batch_size:int = 100) -> sequence(PersistentObject) Returns a generator for the sequence of objects in a breadth first traversal of the object graph, starting at the given start_oid. The objects in the sequence have their state loaded at the same time, so this can be used to initialize the object cache. This uses the storage's bulk_load() method to make it faster. The batch_size argument sets the number of object records loaded on each call to bulk_load(). """ oid_record_sequence = self.storage.gen_oid_record( start_oid=start_oid, batch_size=batch_size) for oid, record in oid_record_sequence: obj = self.cache.get(oid) if obj is not None and not obj._p_is_ghost(): yield obj else: record_oid, data, refdata = unpack_record(record) if obj is None: klass = loads(data) obj = self.cache.get_instance(oid, klass, self) state = self.reader.get_state(data, load=True) obj.__setstate__(state) obj._p_set_status_saved() yield obj
def gen_reachable_records(): # we order the todo queue by file offset. The hope is that the # packed file will be mostly the same as the old file in order # to speed up the rsync delta process. default_rank = 2**64 pack_todo = [(0, durus.connection.ROOT_OID)] while pack_todo or self.pack_extra: if self.pack_extra: oid = self.pack_extra.pop() # note we don't check 'index' because it could be an # object that got updated since the pack began and in # that case we have to write the new record to the pack # file else: rank, oid = heapq.heappop(pack_todo) if oid in index: # we already wrote this object record continue record = self.load(oid) oid2, data, refdata = unpack_record(record) assert oid == oid2 # ensure we have records for objects referenced for ref_oid in split_oids(refdata): item = (self.index.get(ref_oid, default_rank), ref_oid) heapq.heappush(pack_todo, item) yield (oid, record)
def gen_referring_oid_record(storage, referred_oid): """(storage:Storage, referred_oid:str) -> sequence([oid:str, record:str]) Generate oid, record pairs for all objects that include a reference to the `referred_oid`. """ for oid, record in storage.gen_oid_record(): if referred_oid in split_oids(unpack_record(record)[2]): yield oid, record
def gen_every_instance(connection, *classes): """(connection:Connection, *classes:(class)) -> sequence [PersistentObject] Generate all PersistentObject instances that are instances of any of the given classes.""" for oid, record in connection.get_storage().gen_oid_record(): record_oid, state, refs = unpack_record(record) record_class = loads(state) if issubclass(record_class, classes): yield connection.get(oid)
def get_reference_index(storage): """(storage:Storage) -> {oid:str : [referring_oid:str]} Return a full index giving the referring oids for each oid. This might be large. """ result = {} for oid, record in storage.gen_oid_record(): for ref in split_oids(unpack_record(record)[2]): result.setdefault(ref, []).append(oid) return result
def check_record_pack_unpack(self): oid = as_bytes('0'*8) data = as_bytes('sample') reflist = ['1'*8, '2'*8] reflist = list(map(as_bytes, reflist)) refs = join_bytes(reflist) result=unpack_record(pack_record(oid, data, refs)) assert result[0] == oid assert result[1] == data assert split_oids(result[2]) == reflist assert split_oids('') == []
def touch_every_reference(connection, *words): """(connection:Connection, *words:(str)) Mark as changed, every object whose pickled class/state contains any of the given words. This is useful when you move or rename a class, so that all references can be updated. """ get = connection.get reader = ObjectReader(connection) for oid, record in connection.get_storage().gen_oid_record(): record_oid, data, refs = unpack_record(record) state = reader.get_state_pickle(data) for word in words: if word in data or word in state: get(oid)._p_note_change()
def get_stored_pickle(self, oid): """(oid:str) -> str Retrieve the pickle from storage. Will raise ReadConflictError if the oid is invalid. """ assert oid not in self.invalid_oids, "still conflicted: missing abort()" try: record = self.storage.load(oid) except ReadConflictError: invalid_oids = self.storage.sync() self._handle_invalidations(invalid_oids, read_oid=oid) record = self.storage.load(oid) oid2, data, refdata = unpack_record(record) assert as_bytes(oid) == oid2, (oid, oid2) return data
def touch_every_reference(connection, *words): """(connection:Connection, *words:(str)) Mark as changed, every object whose pickled class/state contains any of the given words. This is useful when you move or rename a class, so that all references can be updated. """ get = connection.get reader = ObjectReader(connection) words = [as_bytes(w) for w in words] for oid, record in connection.get_storage().gen_oid_record(): record_oid, data, refs = unpack_record(record) state = reader.get_state_pickle(data) for word in words: if word in data or word in state: get(oid)._p_note_change()
def gen_reachable_records(): todo = [ROOT_OID] seen = Set() while todo: oid = todo.pop() if oid in seen: continue seen.add(oid) record = self.load(oid) record_oid, data, refdata = unpack_record(record) assert oid == record_oid todo.extend(split_oids(refdata)) yield oid, record while self.pack_extra: oid = self.pack_extra.pop() yield oid, self.load(oid)
def get_stored_pickle(self, oid): """(oid:str) -> str Retrieve the pickle from storage. Will raise ReadConflictError if pickle the pickle is invalid. """ if oid in self.invalid_oids: # someone is still trying to read after getting a conflict raise ReadConflictError([oid]) try: record = self.storage.load(oid) except ReadConflictError: invalid_oids = self.storage.sync() self._handle_invalidations(invalid_oids, read_oid=oid) record = self.storage.load(oid) oid2, data, refdata = unpack_record(record) assert oid == oid2 return data
def gen_oid_record(self, start_oid=None, **other): if start_oid is None: for item in iteritems(self.shelf): yield item else: todo = [start_oid] seen = IntSet() # This eventually contains them all. while todo: oid = todo.pop() if str_to_int8(oid) in seen: continue seen.add(str_to_int8(oid)) record = self.load(oid) record_oid, data, refdata = unpack_record(record) assert oid == record_oid todo.extend(split_oids(refdata)) yield oid, record
def gen_oid_record(self, start_oid=None, seen=None, **other): if start_oid is None: for item in iteritems(self.shelf): yield item else: todo = [start_oid] if seen is None: seen = IntSet() # This eventually contains them all. while todo: oid = heapq.heappop(todo) if str_to_int8(oid) in seen: continue seen.add(str_to_int8(oid)) record = self.load(oid) record_oid, data, refdata = unpack_record(record) assert oid == record_oid for ref_oid in split_oids(refdata): heapq.heappush(todo, ref_oid) yield oid, record
def gen_reachable_records(): pack_todo = [durus.connection.ROOT_OID] while pack_todo or self.pack_extra: if self.pack_extra: oid = self.pack_extra.pop() # note we don't check 'index' because it could be an # object that got updated since the pack began and in # that case we have to write the new record to the pack # file else: oid = heapq.heappop(pack_todo) if oid in index: # we already wrote this object record continue record = self.load(oid) oid2, data, refdata = unpack_record(record) assert oid == oid2 # ensure we have records for objects referenced for ref_oid in split_oids(refdata): heapq.heappush(pack_todo, ref_oid) yield (oid, record)