def _resize(self, newsize): mm = self.__manager__ allocated = self._allocated # Only realloc if we don't have enough space already. if (allocated >= newsize and newsize >= allocated >> 1): assert self._items != None or newsize == 0 with mm.transaction(): ob = ffi.cast('PVarObject *', self._body) mm.snapshot_range(ffi.addressof(ob, 'ob_size'), ffi.sizeof('size_t')) ob.ob_size = newsize return # We use CPython's overallocation algorithm. new_allocated = (newsize >> 3) + (3 if newsize < 9 else 6) + newsize if newsize == 0: new_allocated = 0 items = self._items with mm.transaction(): if items is None: items = mm.malloc(new_allocated * ffi.sizeof('PObjPtr'), type_num=LIST_POBJPTR_ARRAY_TYPE_NUM) else: items = mm.realloc(self._body.ob_items, new_allocated * ffi.sizeof('PObjPtr'), LIST_POBJPTR_ARRAY_TYPE_NUM) mm.snapshot_range(self._body, ffi.sizeof('PListObject')) self._body.ob_items = items self._body.allocated = new_allocated ffi.cast('PVarObject *', self._body).ob_size = newsize
def _add(self, key): mm = self._p_mm khash = fixed_hash(key) result = ADD_RESULT_RESTART with mm.transaction(): while result == ADD_RESULT_RESTART: index, result = self._get_available_entry_slot(key, khash) if result == ADD_RESULT_FOUND_UNUSED or \ result == ADD_RESULT_FOUND_DUMMY: table_data = ffi.cast('PSetEntry *', mm.direct(self._body.table)) mm.snapshot_range(ffi.addressof(table_data, index), ffi.sizeof('PSetEntry')) oid = mm.persist(key) mm.incref(oid) p_obj = ffi.cast('PObject *', mm.direct(oid)) table_data[index].key = oid table_data[index].hash = khash mm.snapshot_range( ffi.addressof(self._body, 'fill'), ffi.sizeof('PSetObject') - ffi.sizeof('PObject')) self._body.used += 1 if result == ADD_RESULT_FOUND_UNUSED: self._body.fill += 1 if self._body.fill * 3 >= self._body.mask * 2: self._table_resize(self._body.used)
def _p_new(self, manager): mm = self._p_mm = manager with mm.transaction(): # XXX Will want to implement a freelist here, like CPython self._p_oid = mm.zalloc(ffi.sizeof('PListObject')) ob = ffi.cast('PObject *', mm.direct(self._p_oid)) ob.ob_type = mm._get_type_code(PersistentList) self._body = ffi.cast('PListObject *', mm.direct(self._p_oid))
def _get_available_entry_slot(self, key, khash): mm = self._p_mm mask = self._body.mask i = khash & mask table_data = ffi.cast('PSetEntry *', mm.direct(self._body.table)) entry = table_data[i] if entry.hash == HASH_UNUSED: return i, ADD_RESULT_FOUND_UNUSED perturb = khash freeslot = -1 while True: if entry.hash == khash: startkey = self._p_mm.resurrect(entry.key) if startkey == key: return i, ADD_RESULT_FOUND_ACTIVE """ TODO: find a test for this unlikely behaviour """ crtkey = self._p_mm.resurrect(entry.key) crttable = ffi.cast('PSetEntry *', mm.direct(self._body.table)) if crtkey is not startkey or crttable is not table_data: return -1, ADD_RESULT_RESTART elif entry.hash == HASH_DUMMY and freeslot == -1: freeslot = i for j in range(i + 1, min(i + LINEAR_PROBES, mask) + 1): entry = table_data[j] if entry.hash == HASH_UNUSED: if freeslot == -1: return j, ADD_RESULT_FOUND_UNUSED return freeslot, ADD_RESULT_FOUND_DUMMY if entry.hash == khash: startkey = self._p_mm.resurrect(entry.key) if startkey == key: return j, ADD_RESULT_FOUND_ACTIVE """ TODO: find a test for this unlikely behaviour """ crtkey = self._p_mm.resurrect(entry.key) crttable = ffi.cast('PSetEntry *', mm.direct(self._body.table)) if crtkey is not startkey or crttable is not table_data: return -1, ADD_RESULT_RESTART elif entry.hash == HASH_DUMMY and freeslot == -1: freeslot = j perturb >>= PERTURB_SHIFT i = (i * 5 + 1 + perturb) & mask entry = table_data[i] if entry.hash == HASH_UNUSED: if freeslot == -1: return i, ADD_RESULT_FOUND_UNUSED return freeslot, ADD_RESULT_FOUND_DUMMY
def _persist_builtins_float(self, f): type_code = self._get_type_code(f.__class__) with self.transaction(): p_float_oid = self.zalloc(ffi.sizeof('PFloatObject')) p_float = ffi.cast('PObject *', self.direct(p_float_oid)) p_float.ob_type = type_code p_float = ffi.cast('PFloatObject *', p_float) p_float.fval = f return p_float_oid
def _free_keys_object(self, oid): mm = self._p_mm dk = ffi.cast('PDictKeysObject *', mm.direct(oid)) ep = ffi.cast('PDictKeyEntry *', ffi.addressof(dk.dk_entries[0])) with mm.transaction(): for i in range(dk.dk_size): mm.xdecref(ep[i].me_key) mm.xdecref(ep[i].me_value) mm.free(oid)
def _p_new(self, manager): mm = self._p_mm = manager with mm.transaction(): self._p_oid = mm.zalloc(ffi.sizeof('PTupleObject')) ob = ffi.cast('PObject *', mm.direct(self._p_oid)) ob.ob_type = mm._get_type_code(PersistentTuple) self._body = ffi.cast('PTupleObject *', mm.direct(self._p_oid)) self._body.ob_items = mm.OID_NULL
def _p_new(self, manager): mm = self._p_mm = manager with mm.transaction(): self._p_oid = mm.zalloc(ffi.sizeof('PSetObject')) ob = ffi.cast('PObject *', mm.direct(self._p_oid)) ob.ob_type = mm._get_type_code(self.__class__) size = PERM_SET_MINSIZE self._body = ffi.cast('PSetObject *', mm.direct(self._p_oid)) self._body.mask = (size - 1) self._body.hash = HASH_INVALID self._body.table = self._alloc_empty_table(PERM_SET_MINSIZE)
def _persist_builtins_str(self, s): type_code = self._get_type_code(s.__class__) if sys.version_info[0] > 2: s = s.encode('utf-8') with self.transaction(): p_str_oid = self.zalloc(ffi.sizeof('PObject') + len(s) + 1) p_str = ffi.cast('PObject *', self.direct(p_str_oid)) p_str.ob_type = type_code body = ffi.cast('char *', p_str) + ffi.sizeof('PObject') ffi.buffer(body, len(s))[:] = s return p_str_oid
def _p_new(self, manager): mm = self._p_mm = manager with mm.transaction(): # XXX will want to implement a freelist here. self._p_oid = mm.zalloc(ffi.sizeof('PDictObject')) ob = ffi.cast('PObject *', mm.direct(self._p_oid)) ob.ob_type = mm._get_type_code(PersistentDict) d = self._body = ffi.cast('PDictObject *', mm.direct(self._p_oid)) # This code may get moved to a _new_dict method when we implement # split dicts. d.ma_keys = self._new_keys_object(MIN_SIZE_COMBINED) d.ma_values = mm.OID_NULL
def _persist_nvm_pmemobj_pool_PICKLE_SENTINEL(self, obj): type_code = self._get_type_code(PICKLE_SENTINEL) s = dumps(obj) print(s) with self.transaction(): p_obj_oid = self.zalloc(ffi.sizeof('PVarObject') + len(s)) p_pickle = ffi.cast('PVarObject *', self.direct(p_obj_oid)) p_pickle.ob_base.ob_type = type_code p_pickle.ob_size = len(s) body = ffi.cast('char *', p_pickle) + ffi.sizeof('PVarObject') ffi.buffer(body, len(s))[:] = s return p_obj_oid
def _p_new(self, manager): self._p_dict = {} # This makes __getattribute__ simpler mm = self._p_mm = manager with mm.transaction(): # XXX will want to implement a freelist here. self._p_oid = mm.zalloc(ffi.sizeof('PObjectObject')) ob = ffi.cast('PObject *', mm.direct(self._p_oid)) ob.ob_type = mm._get_type_code(self.__class__) d = self._p_body = ffi.cast('PObjectObject *', mm.direct(self._p_oid)) self._p_dict = mm.new(PersistentDict) d.ob_dict = self._p_dict._p_oid mm.incref(self._p_dict._p_oid) self._v__init__()
def __debug_repr__(self): mm = self._p_mm table_data = ffi.cast('PSetEntry *', mm.direct(self._body.table)) set_content = "" for i in range(0, self._body.mask + 1): entry = table_data[i] if entry.hash == HASH_UNUSED: set_content += "<U>, " elif entry.hash == HASH_DUMMY: set_content += "<D>, " else: p_obj = ffi.cast('PObject *', mm.direct(entry.key)) set_content += "(%s h:%s rct:%s), " % (mm.resurrect( entry.key), entry.hash, p_obj.ob_refcnt) return "%s:[%s]" % (self.__class__.__name__, set_content)
def resurrect(self, oid): """Return python object representing the data stored at oid.""" oid = self.otuple(oid) tlog.debug('resurrect: %r', oid) try: return self._obj_cache.obj_from_oid(oid) except KeyError: pass obj_ptr = ffi.cast('PObject *', self.direct(oid)) type_code = obj_ptr.ob_type # The special cases are to avoid infinite regress in the type table. klass = None if type_code == 0: cls_str = _class_string(PersistentList) elif type_code == 1: cls_str = _class_string(str) else: cls_str = self._type_table[type_code] resurrector = '_resurrect_' + cls_str.replace(':', '_').replace( '.', '_') if hasattr(self, resurrector): obj = getattr(self, resurrector)(obj_ptr) log.debug('resurrect %r: immutable type (%r): %r', oid, resurrector, obj) else: # It must be a Persistent type. cls = _find_class_from_string(cls_str) obj = cls.__new__(cls) obj._p_resurrect(self, oid) log.debug('resurrect %r: persistent type (%r): %r', oid, cls_str, obj) self._obj_cache.cache(oid, obj) return obj
def _insert_clean(self, table, mask, key_oid, khash): mm = self._p_mm perturb = khash i = khash & mask table_data = ffi.cast('PSetEntry *', mm.direct(table)) found_index = -1 while True: if table_data[i].hash == HASH_UNUSED: found_index = i break for j in range(i + 1, min(i + LINEAR_PROBES, mask) + 1): if table_data[j].hash == HASH_UNUSED: found_index = j break if found_index != -1: break perturb >>= PERTURB_SHIFT i = (i * 5 + 1 + perturb) & mask with mm.transaction(): mm.snapshot_range(ffi.addressof(table_data, found_index), ffi.sizeof('PSetEntry')) table_data[found_index].hash = khash table_data[found_index].key = key_oid
def resurrect(self, oid): """Return python object representing the data stored at oid.""" oid = self.otuple(oid) tlog.debug('resurrect: %r', oid) try: return self._obj_cache.obj_from_oid(oid) except KeyError: pass obj_ptr = ffi.cast('PObject *', self.direct(oid)) type_code = obj_ptr.ob_type # The special cases are to avoid infinite regress in the type table. if type_code == 0: obj = PersistentList(__manager__=self, _oid=oid) self._obj_cache.cache(oid, obj) log.debug('resurrect PersistentList: %s %r', oid, obj) return obj if type_code == 1: cls_str = 'builtins:str' else: cls_str = self._type_table[type_code] resurrector = '_resurrect_' + cls_str.replace(':', '_') if not hasattr(self, resurrector): # It must be a persistent type. cls = find_class_from_string(cls_str) res = cls(__manager__=self, _oid=oid) log.debug('resurrect %r: persistent type (%r): %r', oid, cls_str, res) return res obj = getattr(self, resurrector)(obj_ptr) self._obj_cache.cache(oid, obj) log.debug('resurrect %r: immutable type (%r): %r', oid, resurrector, obj) return obj
def _resize(self, newsize): # Note that resize does *not* set self._size. That needs to be done by # the caller such that that the we never expose invalid item cells. # The size field is covered by a snapshot done here, though. mm = self._p_mm allocated = self._allocated # Only realloc if we don't have enough space already. if (allocated >= newsize and newsize >= allocated >> 1): assert self._items != None or newsize == 0 with mm.transaction(): ob = ffi.cast('PVarObject *', self._body) mm.snapshot_range(ffi.addressof(ob, 'ob_size'), ffi.sizeof('size_t')) ob.ob_size = newsize return # We use CPython's overallocation algorithm. new_allocated = (newsize >> 3) + (3 if newsize < 9 else 6) + newsize if newsize == 0: new_allocated = 0 items = self._items with mm.transaction(): if items is None: items = mm.zalloc(new_allocated * ffi.sizeof('PObjPtr'), type_num=LIST_POBJPTR_ARRAY_TYPE_NUM) else: items = mm.zrealloc(self._body.ob_items, new_allocated * ffi.sizeof('PObjPtr'), LIST_POBJPTR_ARRAY_TYPE_NUM) mm.snapshot_range(self._body, ffi.sizeof('PListObject')) self._body.ob_items = items self._body.allocated = new_allocated
def resurrect(self, oid): """Return python object representing the data stored at oid.""" oid = self.otuple(oid) tlog.debug('resurrect: %r', oid) try: return self._obj_cache.obj_from_oid(oid) except KeyError: pass obj_ptr = ffi.cast('PObject *', self.direct(oid)) type_code = obj_ptr.ob_type # The special cases are to avoid infinite regress in the type table. klass = None if type_code == 0: cls_str = _class_string(PersistentList) elif type_code == 1: cls_str = _class_string(str) else: cls_str = self._type_table[type_code] resurrector = '_resurrect_' + cls_str.replace(':', '_').replace('.', '_') if hasattr(self, resurrector): obj = getattr(self, resurrector)(obj_ptr) log.debug('resurrect %r: immutable type (%r): %r', oid, resurrector, obj) else: # It must be a Persistent type. cls = _find_class_from_string(cls_str) obj = cls.__new__(cls) obj._p_resurrect(self, oid) log.debug('resurrect %r: persistent type (%r): %r', oid, cls_str, obj) self._obj_cache.cache(oid, obj) return obj
def _insertion_resize(self): # This is modeled on CPython's insertion_resize/dictresize, but # assuming we always have a combined dict. We copy the keys and values # into a new dict structure and free the old one. We don't touch the # refcounts. mm = self._p_mm minused = self._growth_rate() newsize = MIN_SIZE_COMBINED while newsize <= minused and newsize > 0: newsize = newsize << 1 oldkeys = self._keys oldkeys_oid = mm.otuple(self._body.ma_keys) with mm.transaction(): mm.snapshot_range(ffi.addressof(self._body, 'ma_keys'), ffi.sizeof('PObjPtr')) self._body.ma_keys = self._new_keys_object(newsize) oldsize = oldkeys.dk_size old_ep0 = ffi.cast('PDictKeyEntry *', ffi.addressof(oldkeys.dk_entries[0])) for i in range(oldsize): old_ep = old_ep0[i] me_value = mm.otuple(old_ep.me_value) if me_value != mm.OID_NULL: me_key = mm.otuple(old_ep.me_key) assert me_key != DUMMY me_hash = old_ep.me_hash new_ep = self._find_empty_slot(me_key, me_hash) new_ep.me_key = me_key new_ep.me_hash = me_hash new_ep.me_value = me_value self._keys.dk_usable -= self._body.ma_used mm.free(oldkeys_oid)
def _lookkey(self, key, khash): mm = self._p_mm mask = self._body.mask i = khash & mask table_data = ffi.cast('PSetEntry *', mm.direct(self._body.table)) entry = table_data[i] if entry.hash == HASH_UNUSED: return -1 perturb = khash while(True): if entry.hash == khash: startkey = self._p_mm.resurrect(entry.key) if startkey == key: return i """ TODO: find a test for this unlikely behaviour """ crtkey = self._p_mm.resurrect(entry.key) crttable = ffi.cast('PSetEntry *', mm.direct(self._body.table)) if crtkey != startkey: return self._lookkey(key, khash) for j in range(i + 1, min(i + LINEAR_PROBES, mask) + 1): entry = table_data[j] if entry.hash == HASH_UNUSED: return -1 if entry.hash == khash: startkey = self._p_mm.resurrect(entry.key) if startkey == key: return j """ TODO: find a test for this unlikely behaviour """ crtkey = self._p_mm.resurrect(entry.key) crttable = ffi.cast('PSetEntry *', mm.direct(self._body.table)) if crtkey is not startkey or crttable is not table_data: return self._lookkey(key, khash) perturb >>= PERTURB_SHIFT i = (i * 5 + 1 + perturb) & mask entry = table_data[i] if entry.hash == HASH_UNUSED: return -1
def __debug_repr__(self): mm = self._p_mm table_data = ffi.cast('PSetEntry *', mm.direct(self._body.table)) set_content = "" for i in range(0, self._body.mask + 1): entry = table_data[i] if entry.hash == HASH_UNUSED: set_content += "<U>, " elif entry.hash == HASH_DUMMY: set_content += "<D>, " else: p_obj = ffi.cast('PObject *', mm.direct(entry.key)) set_content += "(%s h:%s rct:%s), " % ( mm.resurrect(entry.key), entry.hash, p_obj.ob_refcnt) return "%s:[%s]" % (self.__class__.__name__, set_content)
def _p_traverse(self): mm = self._p_mm table_data = ffi.cast('PSetEntry *', mm.direct(self._body.table)) for i in range(0, self._body.mask + 1): entry = table_data[i] if entry.hash in (HASH_UNUSED, HASH_DUMMY): continue yield entry.key
def __iter__(self): mm = self._p_mm table_data = ffi.cast('PSetEntry *', mm.direct(self._body.table)) for i in range(0, self._body.mask + 1): entry = table_data[i] if entry.hash in [HASH_UNUSED, HASH_DUMMY]: continue yield mm.resurrect(entry.key)
def _lookkey(self, key, khash): mm = self._p_mm mask = self._body.mask i = khash & mask table_data = ffi.cast('PSetEntry *', mm.direct(self._body.table)) entry = table_data[i] if entry.hash == HASH_UNUSED: return -1 perturb = khash while (True): if entry.hash == khash: startkey = self._p_mm.resurrect(entry.key) if startkey == key: return i """ TODO: find a test for this unlikely behaviour """ crtkey = self._p_mm.resurrect(entry.key) crttable = ffi.cast('PSetEntry *', mm.direct(self._body.table)) if crtkey != startkey: return self._lookkey(key, khash) for j in range(i + 1, min(i + LINEAR_PROBES, mask) + 1): entry = table_data[j] if entry.hash == HASH_UNUSED: return -1 if entry.hash == khash: startkey = self._p_mm.resurrect(entry.key) if startkey == key: return j """ TODO: find a test for this unlikely behaviour """ crtkey = self._p_mm.resurrect(entry.key) crttable = ffi.cast('PSetEntry *', mm.direct(self._body.table)) if crtkey is not startkey or crttable is not table_data: return self._lookkey(key, khash) perturb >>= PERTURB_SHIFT i = (i * 5 + 1 + perturb) & mask entry = table_data[i] if entry.hash == HASH_UNUSED: return -1
def __init__(self, filename, flag='w', pool_size=MIN_POOL_SIZE, mode=0o666, debug=False): """Open or create a persistent object pool backed by filename. If flag is 'w', raise an OSError if the file does not exist and otherwise open it for reading and writing. If flag is 'x', raise an OSError if the file *does* exist, otherwise create it and open it for reading and writing. If flag is 'c', create the file if it does not exist, otherwise use the existing file. If the file gets created, use pool_size as the size of the new pool in bytes and mode as its access mode, otherwise ignore these parameters and open the existing file. If debug is True, generate some additional logging, including turning on some additional sanity-check warnings. This may have an impact on performance. See also the open and create functions of nvm.pmemobj, which are convenience functions for the 'w' and 'x' flags, respectively. """ log.debug('PersistentObjectPool.__init__: %r, %r %r, %r', filename, flag, pool_size, mode) self.filename = filename self.debug = debug exists = os.path.exists(filename) if flag == 'w' or (flag == 'c' and exists): self._pool_ptr = _check_null( lib.pmemobj_open(_coerce_fn(filename), layout_version)) elif flag == 'x' or (flag == 'c' and not exists): self._pool_ptr = _check_null( lib.pmemobj_create(_coerce_fn(filename), layout_version, pool_size, mode)) elif flag == 'r': raise ValueError("Read-only mode is not supported") else: raise ValueError("Invalid flag value {}".format(flag)) mm = self.mm = MemoryManager(self._pool_ptr) pmem_root = lib.pmemobj_root(self._pool_ptr, ffi.sizeof('PRoot')) pmem_root = ffi.cast('PRoot *', mm.direct(pmem_root)) type_table_oid = mm.otuple(pmem_root.type_table) if type_table_oid == mm.OID_NULL: with mm.transaction(): type_table_oid = mm._create_type_table() mm.snapshot_range(pmem_root, ffi.sizeof('PObjPtr')) pmem_root.type_table = type_table_oid else: mm._resurrect_type_table(type_table_oid) self._pmem_root = pmem_root if exists: # Make sure any objects orphaned by a crash are cleaned up. # XXX should fix this to only be called when there is a crash. self.gc()
def _dumpdict(self): # This is for debugging. mm = self._p_mm keys = self._keys ep0 = ffi.cast('PDictKeyEntry *', ffi.addressof(keys.dk_entries[0])) log.debug('size: %s', keys.dk_size) for i in range(keys.dk_size): ep = ep0[i] log.debug('hash: %s, key oid: %s, value oid: %s', ep.me_hash, mm.otuple(ep.me_key), mm.otuple(ep.me_value))
def _lookdict(self, key, khash): # Generalized key lookup method. mm = self._p_mm while True: keys_oid = mm.otuple(self._body.ma_keys) keys = ffi.cast('PDictKeysObject *', mm.direct(keys_oid)) mask = keys.dk_size - 1 ep0 = ffi.cast('PDictKeyEntry *', ffi.addressof(keys.dk_entries[0])) i = khash & mask ep = ffi.addressof(ep0[i]) me_key = mm.otuple(ep.me_key) if me_key == mm.OID_NULL: return ep if me_key == DUMMY: freeslot = ep else: if ep.me_hash == khash: match = mm.resurrect(me_key) == key # dict could mutate if (mm.otuple(self._body.ma_keys) == keys_oid and mm.otuple(ep.me_key) == me_key): if match: return ep else: continue # mutatation, start over from the top. freeslot = None perturb = khash while True: i = (i << 2) + i + perturb + 1 ep = ep0[i & mask] me_key = mm.otuple(ep.me_key) if me_key == mm.OID_NULL: return ep if freeslot is None else freeslot if ep.me_hash == khash and me_key != DUMMY: match = mm.resurrect(me_key) == key # dict could mutate if (mm.otuple(self._body.ma_keys) == keys_oid and mm.otuple(ep.me_key) == me_key): if match: return ep else: break # mutation, start over from the top. elif me_key == DUMMY and freeslot is None: freeslot = ep perturb = perturb >> PERTURB_SHIFT
def __iter__(self): mm = self._p_mm keys = self._keys ep0 = ffi.cast('PDictKeyEntry *', ffi.addressof(keys.dk_entries[0])) for i in range(keys.dk_size): ep = ep0[i] if (ep.me_hash == ffi.NULL or mm.otuple(ep.me_key) in (mm.OID_NULL, DUMMY)): continue yield mm.resurrect(ep.me_key)
def _new_keys_object(self, size): assert size >= MIN_SIZE_SPLIT mm = self._p_mm with mm.transaction(): dk_oid = mm.zalloc(ffi.sizeof('PDictKeysObject') + ffi.sizeof('PDictKeyEntry') * (size - 1), type_num=PDICTKEYSOBJECT_TYPE_NUM) dk = ffi.cast('PDictKeysObject *', mm.direct(dk_oid)) dk.dk_refcnt = 1 dk.dk_size = size dk.dk_usable = _usable_fraction(size) ep = ffi.cast('PDictKeyEntry *', ffi.addressof(dk.dk_entries[0])) # Hash value of slot 0 is used by popitem, so it must be initizlied ep[0].me_hash = 0 for i in range(size): ep[i].me_key = mm.OID_NULL ep[i].me_value = mm.OID_NULL # XXX Set dk_lookup to lookdict_unicode_nodummy if we end up using it. return dk_oid
def _table_resize(self, minused): mm = self._p_mm if minused > 50000: minused = (minused << 1) else: minused = (minused << 2) newsize = PERM_SET_MINSIZE while(newsize <= minused): newsize = (newsize << 1) newsize = ffi.cast('size_t', newsize) if newsize == 0: raise MemoryError("Out of memory") newsize = int(newsize) with mm.transaction(): oldtable = mm.otuple(self._body.table) oldtable_data = ffi.cast('PSetEntry *', mm.direct(oldtable)) newtable = self._alloc_empty_table(newsize) newmask = newsize - 1 for i in range(0, self._body.mask + 1): if oldtable_data[i].hash == HASH_UNUSED or \ oldtable_data[i].hash == HASH_DUMMY: continue self._insert_clean(newtable, newmask, oldtable_data[i].key, oldtable_data[i].hash) mm.snapshot_range(ffi.addressof(self._body, 'fill'), ffi.sizeof('PSetObject') - ffi.sizeof('PObject')) self._body.mask = newmask self._body.fill = self._body.used self._body.table = newtable mm.free(oldtable)
def _table_resize(self, minused): mm = self._p_mm if minused > 50000: minused = (minused << 1) else: minused = (minused << 2) newsize = PERM_SET_MINSIZE while (newsize <= minused): newsize = (newsize << 1) newsize = ffi.cast('size_t', newsize) if newsize == 0: raise MemoryError("Out of memory") newsize = int(newsize) with mm.transaction(): oldtable = mm.otuple(self._body.table) oldtable_data = ffi.cast('PSetEntry *', mm.direct(oldtable)) newtable = self._alloc_empty_table(newsize) newmask = newsize - 1 for i in range(0, self._body.mask + 1): if oldtable_data[i].hash == HASH_UNUSED or \ oldtable_data[i].hash == HASH_DUMMY: continue self._insert_clean(newtable, newmask, oldtable_data[i].key, oldtable_data[i].hash) mm.snapshot_range(ffi.addressof(self._body, 'fill'), ffi.sizeof('PSetObject') - ffi.sizeof('PObject')) self._body.mask = newmask self._body.fill = self._body.used self._body.table = newtable mm.free(oldtable)
def __init__(self, *args, **kw): if '__manager__' not in kw: raise ValueError("__manager__ is required") mm = self.__manager__ = kw.pop('__manager__') if '_oid' not in kw: with mm.transaction(): # XXX Will want to implement a freelist here, like CPython self._oid = mm.malloc(ffi.sizeof('PListObject')) ob = ffi.cast('PObject *', mm.direct(self._oid)) ob.ob_type = mm._get_type_code(PersistentList) else: self._oid = kw.pop('_oid') if kw: raise TypeError("Unrecognized keyword argument(s) {}".format(kw)) self._body = ffi.cast('PListObject *', mm.direct(self._oid)) if args: if len(args) != 1: raise TypeError("PersistentList takes at most 1" " argument, {} given".format(len(args))) self.extend(args[0])
def incref(self, oid): """Increment the reference count of oid.""" oid = self.otuple(oid) if oid == OID_NULL: # Unlike CPython, we don't ref-track our constants. return p_obj = ffi.cast('PObject *', self.direct(oid)) log.debug('incref %r %r', oid, p_obj.ob_refcnt + 1) with self.transaction(): self.snapshot_range(ffi.addressof(p_obj, 'ob_refcnt'), ffi.sizeof('size_t')) p_obj.ob_refcnt += 1
def _discard(self, key): mm = self._p_mm with mm.transaction(): keyindex = self._lookkey(key, fixed_hash(key)) if keyindex != -1: table_data = ffi.cast('PSetEntry *', mm.direct(self._body.table)) mm.snapshot_range(ffi.addressof(table_data, keyindex), ffi.sizeof('PSetEntry')) mm.decref(table_data[keyindex].key) table_data[keyindex].key = mm.OID_NULL table_data[keyindex].hash = HASH_DUMMY self._body.used -= 1
def decref(self, oid): """Decrement the reference count of oid, and free it if zero.""" oid = self.otuple(oid) p_obj = ffi.cast('PObject *', self.direct(oid)) log.debug('decref %r %r', oid, p_obj.ob_refcnt - 1) with self.transaction(): self.snapshot_range(ffi.addressof(p_obj, 'ob_refcnt'), ffi.sizeof('size_t')) assert p_obj.ob_refcnt > 0, "{} oid refcount {}".format( oid, p_obj.ob_refcnt) p_obj.ob_refcnt -= 1 if p_obj.ob_refcnt < 1: self._deallocate(oid)
def incref(self, oid): """Increment the reference count of oid if it is not a singleton""" oid = self.otuple(oid) assert oid != self.OID_NULL if not oid[0]: # Unlike CPython, we don't ref-track our constants. log.debug('not increfing %s', oid) return p_obj = ffi.cast('PObject *', self.direct(oid)) log.debug('incref %r %r', oid, p_obj.ob_refcnt + 1) with self.transaction(): self.snapshot_range(ffi.addressof(p_obj, 'ob_refcnt'), ffi.sizeof('size_t')) p_obj.ob_refcnt += 1
def _persist_builtins_int(self, i): # Make sure we get the int type even on python2. The space is needed. type_code = self._get_type_code(1 .__class__) # In theory we could copy the actual CPython data directly here, # but that would mean we'd break on PyPy, etc. So we serialize. i = repr(i) if sys.version_info[0] < 3: i = i.rstrip('L') with self.transaction(): # There's a bit of extra overhead in reusing this, but not much. p_int_oid = self._persist_builtins_str(i) p_int = ffi.cast('PObject *', self.direct(p_int_oid)) p_int.ob_type = type_code return p_int_oid
def _persist_builtins_int(self, i): # Make sure we get the int type even on python2. The space is needed. type_code = self._get_type_code(1.__class__) # In theory we could copy the actual CPython data directly here, # but that would mean we'd break on PyPy, etc. So we serialize. i = repr(i) if sys.version_info[0] < 3: i = i.rstrip('L') with self.transaction(): # There's a bit of extra overhead in reusing this, but not much. p_int_oid = self._persist_builtins_str(i) p_int = ffi.cast('PObject *', self.direct(p_int_oid)) p_int.ob_type = type_code return p_int_oid
def _find_empty_slot(self, key, khash): # Find slot from hash when key is not in dict. mm = self._p_mm keys = self._keys mask = keys.dk_size - 1 ep0 = ffi.cast('PDictKeyEntry *', ffi.addressof(keys.dk_entries[0])) i = khash & mask ep = ffi.addressof(ep0[i]) perturb = khash while mm.otuple(ep.me_key) != mm.OID_NULL: i = (i << 2) + i + perturb + 1 ep = ep0[i & mask] perturb = perturb >> PERTURB_SHIFT assert mm.otuple(ep.me_key) == mm.OID_NULL return ep
def clear(self): mm = self._p_mm if self._size == 0: return items = self._items with mm.transaction(): size = self._size # Set size to zero now so we never have an invalid state. ffi.cast('PVarObject *', self._body).ob_size = 0 for i in range(size): # Grab oid in tuple form so the assignment can't change it oid = mm.otuple(items[i]) items[i] = mm.OID_NULL mm.decref(oid) self._resize(0)
def decref(self, oid): """Decrement the reference count of oid, and free it if zero.""" oid = self.otuple(oid) if not oid[0]: # Unlike CPython we do not ref-track our constants. log.debug('not decrefing %s', oid) return p_obj = ffi.cast('PObject *', self.direct(oid)) log.debug('decref %r %r', oid, p_obj.ob_refcnt - 1) with self.transaction(): self.snapshot_range(ffi.addressof(p_obj, 'ob_refcnt'), ffi.sizeof('size_t')) assert p_obj.ob_refcnt > 0, "{} oid refcount {}".format( oid, p_obj.ob_refcnt) p_obj.ob_refcnt -= 1 if p_obj.ob_refcnt < 1: self._deallocate(oid)
def __delitem__(self, index): mm = self._p_mm index = self._normalize_index(index) size = self._size newsize = size - 1 items = self._items with mm.transaction(): ffi.cast('PVarObject *', self._body).ob_size = newsize # We can't completely hide the process of transformation...this # really needs a lock (or translation to GIL-locked C). mm.snapshot_range(ffi.addressof(items, index), ffi.offsetof('PObjPtr *', size)) oid = mm.otuple(items[index]) for i in range(index, newsize): items[i] = items[i+1] mm.decref(oid) self._resize(newsize)
def __delitem__(self, index): mm = self._p_mm index = self._normalize_index(index) size = self._size newsize = size - 1 items = self._items with mm.transaction(): ffi.cast('PVarObject *', self._body).ob_size = newsize # We can't completely hide the process of transformation...this # really needs a lock (or translation to GIL-locked C). mm.snapshot_range(ffi.addressof(items, index), ffi.offsetof('PObjPtr *', size)) oid = mm.otuple(items[index]) for i in range(index, newsize): items[i] = items[i + 1] mm.decref(oid) self._resize(newsize)
def insert(self, index, value): mm = self._p_mm size = self._size newsize = size + 1 with mm.transaction(): self._resize(newsize) if index < 0: index += size if index < 0: index = 0 if index > size: index = size items = self._items mm.snapshot_range(items + index, ffi.offsetof('PObjPtr *', newsize)) for i in range(size, index, -1): items[i] = items[i-1] v_oid = mm.persist(value) mm.incref(v_oid) items[index] = v_oid ffi.cast('PVarObject *', self._body).ob_size = newsize
def gc(self, debug=None): # XXX add debug flag to constructor, and a test that orphans # generate warning messages when debug=True. """Free all unreferenced objects (cyclic garbage). The object tree is traced from the root, and any object that is not referenced somewhere in the tree is freed. This collects cyclic garbage, and produces warnings for unreferenced objects with incorrect refcounts. Most garbage is automatically collected when the object is no longer referenced. If debug is true, the debug logging output will include reprs of the objects encountered, all orphans will be logged as warnings, and additional checks will be done for orphaned or invalid data structures (those reported by a Persistent object's _p_substructures method). """ # XXX CPython uses a three generation GC in order to obtain more or # less linear performance against the total number of objects. # Currently we are not doing generations; we can get more complicated # later if we want to run the GC periodically. debug = self.debug if debug is None else debug log.debug('gc: start') containers = set() other = set() orphans = set() types = {} substructures = collections.defaultdict(dict) type_counts = collections.defaultdict(int) gc_counts = collections.defaultdict(int) with self.lock: # Catalog all pmem objects. oid = self.mm.otuple(lib.pmemobj_first(self._pool_ptr)) while oid != self.mm.OID_NULL: type_num = lib.pmemobj_type_num(oid) # XXX Could make the _PTR lists PObjects too so they are tracked. if type_num == POBJECT_TYPE_NUM: obj = ffi.cast('PObject *', self.mm.direct(oid)) if debug: if obj.ob_refcnt < 0: log.error("Negative refcount (%s): %s %r", obj.ob_refcnt, oid, self.mm.resurrect(oid)) assert obj.ob_refcnt >= 0, '%s has negative refcnt' % oid # XXX move this cache to the POP? type_code = obj.ob_type if type_code not in types: types[type_code] = _find_class_from_string( self.mm._type_table[type_code]) typ = types[type_code] type_counts[typ.__name__] += 1 assert obj.ob_refcnt >= 0, "{} refcount is {}".format( oid, obj.ob_refcnt) if not obj.ob_refcnt: if debug: log.debug('gc: orphan: %s %s %r', oid, obj.ob_refcnt, self.mm.resurrect(oid)) orphans.add(oid) elif hasattr(typ, '_p_traverse'): if debug: log.debug('gc: container: %s %s %r', oid, obj.ob_refcnt, self.mm.resurrect(oid)) containers.add(oid) else: if debug: log.debug('gc: other: %s %s %r', oid, obj.ob_refcnt, self.mm.resurrect(oid)) other.add(oid) else: if debug: log.debug("gc: non PObject (type %s): %s", type_num, oid) substructures[type_num][oid] = [] oid = self.mm.otuple(lib.pmemobj_next(oid)) gc_counts['containers-total'] = len(containers) gc_counts['other-total'] = len(other) # Clean up refcount 0 orphans (from a crash or code bug). log.debug("gc: deallocating %s orphans", len(orphans)) gc_counts['orphans0-gced'] = len(orphans) for oid in orphans: if debug: # XXX This should be a non debug warning on close. log.warning("deallocating orphan (refcount 0): %s %r", oid, self.mm.resurrect(oid)) self.mm._deallocate(oid) # In debug mode, validate the container substructures. if debug: log.debug("Checking substructure integrity") for container_oid in containers: container = self.mm.resurrect(container_oid) for oid, type_num in container._p_substructures(): oid = self.mm.otuple(oid) if oid == self.mm.OID_NULL: continue if oid not in substructures[type_num]: log.error("%s points to subsctructure type %s" " at %s, but we didn't find it in" " the pmemobj object list.", container_oid, type_num, oid) else: substructures[type_num][oid].append(container_oid) for type_num, structs in substructures.items(): for struct_oid, parent_oids in structs.items(): if not parent_oids: log.error("substructure type %s at %s is not" " referenced by any existing object.", type_num, struct_oid) elif len(parent_oids) > 1: log.error("substructure type %s at %s is" "referenced by more than once object: %s", type_num, struct_oid, parent_oids) # Trace the object tree, removing objects that are referenced. containers.remove(self.mm._type_table._p_oid) live = [self.mm._type_table._p_oid] root_oid = self.mm.otuple(self._pmem_root.root_object) root = self.mm.resurrect(root_oid) if hasattr(root, '_p_traverse'): containers.remove(root_oid) live.append(root_oid) elif root is not None: if debug: log.debug('gc: non-container root: %s %r', root_oid, root) if root_oid[0]: # It's not a singleton, so it should be in other. other.remove(root_oid) for oid in live: if debug: log.debug('gc: checking live %s %r', oid, self.mm.resurrect(oid)) for sub_oid in self.mm.resurrect(oid)._p_traverse(): sub_key = self.mm.otuple(sub_oid) if sub_key in containers: if debug: log.debug('gc: refed container %s %r', sub_key, self.mm.resurrect(sub_oid)) containers.remove(sub_key) live.append(sub_key) elif sub_key in other: if debug: log.debug('gc: refed oid %s %r', sub_key, self.mm.resurrect(sub_oid)) other.remove(sub_key) gc_counts['other-live'] += 1 gc_counts['containers-live'] = len(live) # Everything left is unreferenced via the root, deallocate it. log.debug('gc: deallocating %s containers', len(containers)) self.mm._track_free = set() for oid in containers: if oid in self.mm._track_free: continue if debug: log.debug('gc: deallocating container %s %r', oid, self.mm.resurrect(oid)) with self.mm.transaction(): # incref so we don't try to deallocate us during cycle clear. self.mm.incref(oid) self.mm._deallocate(oid) # deallocate frees oid, so no decref. gc_counts['collections-gced'] = len(containers) log.debug('gc: deallocating %s new orphans', len(other)) for oid in other: if oid in self.mm._track_free: continue log.warning("Orphaned with postive refcount: %s: %s", oid, self.mm.resurrect(oid)) self.mm._deallocate(oid) gc_counts['orphans1-gced'] += 1 gc_counts['other-gced'] = len(other) - gc_counts['orphans1-gced'] self.mm._track_free = None log.debug('gc: end') # All cleaned up, so no need to gc on open. self._pmem_root.clean_shutdown = self.mm.persist(True) return dict(type_counts), dict(gc_counts)