Exemple #1
0
 def gen_items(records):
     for record in records:
         oid, data, refdata = unpack_record(record)
         yield str_to_int8(oid), as_bytes(data), as_bytes(refdata)
         if self.pack_extra is not None:
             # ensure object and refs are marked alive and not removed
             self.pack_extra.append(oid)
Exemple #2
0
    def gen_oid_record(self, start_oid=None, batch_size=100):
        """(start_oid:str = None, batch_size:int = 100) ->
            sequence((oid:str, record:str))
        Returns a generator for the sequence of (oid, record) pairs.

        If a start_oid is given, the resulting sequence follows a
        breadth-first traversal of the object graph, starting at the given
        start_oid.  This uses the storage's bulk_load() method because that
        is faster in some cases.  The batch_size argument sets the number
        of object records loaded on each call to bulk_load().

        If no start_oid is given, the sequence may include oids and records
        that are not reachable from the root.
        """
        if start_oid is None:
            start_oid = durus.connection.ROOT_OID
        todo = deque([start_oid])
        seen = set()
        while todo:
            batch = []
            while todo and len(batch) < batch_size:
                oid = todo.popleft()
                if oid not in seen:
                    batch.append(oid)
                    seen.add(oid)
            for record in self.bulk_load(batch):
                oid, data, refdata = unpack_record(record)
                yield oid, record
                for ref in split_oids(refdata):
                    if ref not in seen:
                        todo.append(ref)
Exemple #3
0
 def get_crawler(self, start_oid=ROOT_OID, batch_size=100):
     """(start_oid:str = ROOT_OID, batch_size:int = 100) ->
         sequence(PersistentObject)
     Returns a generator for the sequence of objects in a breadth first
     traversal of the object graph, starting at the given start_oid.
     The objects in the sequence have their state loaded at the same time,
     so this can be used to initialize the object cache.
     This uses the storage's bulk_load() method to make it faster.  The
     batch_size argument sets the number of object records loaded on each
     call to bulk_load().
     """
     oid_record_sequence = self.storage.gen_oid_record(
         start_oid=start_oid, batch_size=batch_size)
     for oid, record in oid_record_sequence:
         obj = self.cache.get(oid)
         if obj is not None and not obj._p_is_ghost():
             yield obj
         else:
             record_oid, data, refdata = unpack_record(record)
             if obj is None:
                 klass = loads(data)
                 obj = self.cache.get_instance(oid, klass, self)
             state = self.reader.get_state(data, load=True)
             obj.__setstate__(state)
             obj._p_set_status_saved()
             yield obj
Exemple #4
0
 def get_crawler(self, start_oid=ROOT_OID, batch_size=100):
     """(start_oid:str = ROOT_OID, batch_size:int = 100) ->
         sequence(PersistentObject)
     Returns a generator for the sequence of objects in a breadth first
     traversal of the object graph, starting at the given start_oid.
     The objects in the sequence have their state loaded at the same time,
     so this can be used to initialize the object cache.
     This uses the storage's bulk_load() method to make it faster.  The
     batch_size argument sets the number of object records loaded on each
     call to bulk_load().
     """
     oid_record_sequence = self.storage.gen_oid_record(
         start_oid=start_oid, batch_size=batch_size)
     for oid, record in oid_record_sequence:
         obj = self.cache.get(oid)
         if obj is not None and not obj._p_is_ghost():
             yield obj
         else:
             record_oid, data, refdata = unpack_record(record)
             if obj is None:
                 klass = loads(data)
                 obj = self.cache.get_instance(oid, klass, self)
             state = self.reader.get_state(data, load=True)
             obj.__setstate__(state)
             obj._p_set_status_saved()
             yield obj
Exemple #5
0
 def gen_reachable_records():
     # we order the todo queue by file offset. The hope is that the
     # packed file will be mostly the same as the old file in order
     # to speed up the rsync delta process.
     default_rank = 2**64
     pack_todo = [(0, durus.connection.ROOT_OID)]
     while pack_todo or self.pack_extra:
         if self.pack_extra:
             oid = self.pack_extra.pop()
             # note we don't check 'index' because it could be an
             # object that got updated since the pack began and in
             # that case we have to write the new record to the pack
             # file
         else:
             rank, oid = heapq.heappop(pack_todo)
             if oid in index:
                 # we already wrote this object record
                 continue
         record = self.load(oid)
         oid2, data, refdata = unpack_record(record)
         assert oid == oid2
         # ensure we have records for objects referenced
         for ref_oid in split_oids(refdata):
             item = (self.index.get(ref_oid, default_rank), ref_oid)
             heapq.heappush(pack_todo, item)
         yield (oid, record)
Exemple #6
0
def gen_referring_oid_record(storage, referred_oid):
    """(storage:Storage, referred_oid:str) -> sequence([oid:str, record:str])
    Generate oid, record pairs for all objects that include a
    reference to the `referred_oid`.
    """
    for oid, record in storage.gen_oid_record():
        if referred_oid in split_oids(unpack_record(record)[2]):
            yield oid, record
Exemple #7
0
def gen_every_instance(connection, *classes):
    """(connection:Connection, *classes:(class)) -> sequence [PersistentObject]
    Generate all PersistentObject instances that are instances of any of the
    given classes."""
    for oid, record in connection.get_storage().gen_oid_record():
        record_oid, state, refs = unpack_record(record)
        record_class = loads(state)
        if issubclass(record_class, classes):
            yield connection.get(oid)
Exemple #8
0
def gen_every_instance(connection, *classes):
    """(connection:Connection, *classes:(class)) -> sequence [PersistentObject]
    Generate all PersistentObject instances that are instances of any of the
    given classes."""
    for oid, record in connection.get_storage().gen_oid_record():
        record_oid, state, refs = unpack_record(record)
        record_class = loads(state)
        if issubclass(record_class, classes):
            yield connection.get(oid)
Exemple #9
0
def get_reference_index(storage):
    """(storage:Storage) -> {oid:str : [referring_oid:str]}
    Return a full index giving the referring oids for each oid.
    This might be large.
    """
    result = {}
    for oid, record in storage.gen_oid_record():
        for ref in split_oids(unpack_record(record)[2]):
            result.setdefault(ref, []).append(oid)
    return result
Exemple #10
0
 def check_record_pack_unpack(self):
     oid = as_bytes('0'*8)
     data = as_bytes('sample')
     reflist = ['1'*8, '2'*8]
     reflist =  list(map(as_bytes, reflist))
     refs = join_bytes(reflist)
     result=unpack_record(pack_record(oid, data, refs))
     assert result[0] == oid
     assert result[1] == data
     assert split_oids(result[2]) == reflist
     assert split_oids('') == []
Exemple #11
0
def touch_every_reference(connection, *words):
    """(connection:Connection, *words:(str))
    Mark as changed, every object whose pickled class/state contains any
    of the given words.  This is useful when you move or rename a class,
    so that all references can be updated.
    """
    get = connection.get
    reader = ObjectReader(connection)
    for oid, record in connection.get_storage().gen_oid_record():
        record_oid, data, refs = unpack_record(record)
        state = reader.get_state_pickle(data)
        for word in words:
            if word in data or word in state:
                get(oid)._p_note_change()
Exemple #12
0
 def get_stored_pickle(self, oid):
     """(oid:str) -> str
     Retrieve the pickle from storage.  Will raise ReadConflictError if
     the oid is invalid.
     """
     assert oid not in self.invalid_oids, "still conflicted: missing abort()"
     try:
         record = self.storage.load(oid)
     except ReadConflictError:
         invalid_oids = self.storage.sync()
         self._handle_invalidations(invalid_oids, read_oid=oid)
         record = self.storage.load(oid)
     oid2, data, refdata = unpack_record(record)
     assert as_bytes(oid) == oid2, (oid, oid2)
     return data
Exemple #13
0
def touch_every_reference(connection, *words):
    """(connection:Connection, *words:(str))
    Mark as changed, every object whose pickled class/state contains any
    of the given words.  This is useful when you move or rename a class,
    so that all references can be updated.
    """
    get = connection.get
    reader = ObjectReader(connection)
    words = [as_bytes(w) for w in words]
    for oid, record in connection.get_storage().gen_oid_record():
        record_oid, data, refs = unpack_record(record)
        state = reader.get_state_pickle(data)
        for word in words:
            if word in data or word in state:
                get(oid)._p_note_change()
Exemple #14
0
 def get_stored_pickle(self, oid):
     """(oid:str) -> str
     Retrieve the pickle from storage.  Will raise ReadConflictError if
     the oid is invalid.
     """
     assert oid not in self.invalid_oids, "still conflicted: missing abort()"
     try:
         record = self.storage.load(oid)
     except ReadConflictError:
         invalid_oids = self.storage.sync()
         self._handle_invalidations(invalid_oids, read_oid=oid)
         record = self.storage.load(oid)
     oid2, data, refdata = unpack_record(record)
     assert as_bytes(oid) == oid2, (oid, oid2)
     return data
Exemple #15
0
 def gen_reachable_records():
     todo = [ROOT_OID]
     seen = Set()
     while todo:
         oid = todo.pop()
         if oid in seen:
             continue
         seen.add(oid)
         record = self.load(oid)
         record_oid, data, refdata = unpack_record(record)
         assert oid == record_oid
         todo.extend(split_oids(refdata))
         yield oid, record
     while self.pack_extra:
         oid = self.pack_extra.pop()
         yield oid, self.load(oid)
Exemple #16
0
 def get_stored_pickle(self, oid):
     """(oid:str) -> str
     Retrieve the pickle from storage.  Will raise ReadConflictError if
     pickle the pickle is invalid.
     """
     if oid in self.invalid_oids:
         # someone is still trying to read after getting a conflict
         raise ReadConflictError([oid])
     try:
         record = self.storage.load(oid)
     except ReadConflictError:
         invalid_oids = self.storage.sync()
         self._handle_invalidations(invalid_oids, read_oid=oid)
         record = self.storage.load(oid)
     oid2, data, refdata = unpack_record(record)
     assert oid == oid2
     return data
Exemple #17
0
 def gen_oid_record(self, start_oid=None, **other):
     if start_oid is None:
         for item in iteritems(self.shelf):
             yield item
     else:
         todo = [start_oid]
         seen = IntSet()  # This eventually contains them all.
         while todo:
             oid = todo.pop()
             if str_to_int8(oid) in seen:
                 continue
             seen.add(str_to_int8(oid))
             record = self.load(oid)
             record_oid, data, refdata = unpack_record(record)
             assert oid == record_oid
             todo.extend(split_oids(refdata))
             yield oid, record
Exemple #18
0
 def gen_oid_record(self, start_oid=None, **other):
     if start_oid is None:
         for item in iteritems(self.shelf):
             yield item
     else:
         todo = [start_oid]
         seen = IntSet() # This eventually contains them all.
         while todo:
             oid = todo.pop()
             if str_to_int8(oid) in seen:
                 continue
             seen.add(str_to_int8(oid))
             record = self.load(oid)
             record_oid, data, refdata = unpack_record(record)
             assert oid == record_oid
             todo.extend(split_oids(refdata))
             yield oid, record
Exemple #19
0
 def gen_oid_record(self, start_oid=None, seen=None, **other):
     if start_oid is None:
         for item in iteritems(self.shelf):
             yield item
     else:
         todo = [start_oid]
         if seen is None:
             seen = IntSet() # This eventually contains them all.
         while todo:
             oid = heapq.heappop(todo)
             if str_to_int8(oid) in seen:
                 continue
             seen.add(str_to_int8(oid))
             record = self.load(oid)
             record_oid, data, refdata = unpack_record(record)
             assert oid == record_oid
             for ref_oid in split_oids(refdata):
                 heapq.heappush(todo, ref_oid)
             yield oid, record
Exemple #20
0
 def gen_reachable_records():
     pack_todo = [durus.connection.ROOT_OID]
     while pack_todo or self.pack_extra:
         if self.pack_extra:
             oid = self.pack_extra.pop()
             # note we don't check 'index' because it could be an
             # object that got updated since the pack began and in
             # that case we have to write the new record to the pack
             # file
         else:
             oid = heapq.heappop(pack_todo)
             if oid in index:
                 # we already wrote this object record
                 continue
         record = self.load(oid)
         oid2, data, refdata = unpack_record(record)
         assert oid == oid2
         # ensure we have records for objects referenced
         for ref_oid in split_oids(refdata):
             heapq.heappush(pack_todo, ref_oid)
         yield (oid, record)