def exportFile(self, oid, f=None): if f is None: f = TemporaryFile() elif isinstance(f, str): f = open(f,'w+b') f.write('ZEXP') oids = [oid] done_oids = {} done=done_oids.has_key load=self._storage.load while oids: oid = oids.pop(0) if oid in done_oids: continue done_oids[oid] = True try: p, serial = load(oid, self._version) except: logger.debug("broken reference for oid %s", repr(oid), exc_info=True) else: referencesf(p, oids) f.writelines([oid, p64(len(p)), p]) f.write(export_end_marker) return f
def exportFile(self, oid, f=None): if f is None: f = TemporaryFile() elif isinstance(f, str): f = open(f, 'w+b') f.write('ZEXP') oids = [oid] done_oids = {} done = done_oids.has_key load = self._storage.load while oids: oid = oids.pop(0) if oid in done_oids: continue done_oids[oid] = True try: p, serial = load(oid, self._version) except: logger.debug("broken reference for oid %s", repr(oid), exc_info=True) else: referencesf(p, oids) f.writelines([oid, p64(len(p)), p]) f.write(export_end_marker) return f
def referrers(storage): result = {} for transaction in storage.iterator(): for record in transaction: for oid in referencesf(record.data): result.setdefault(oid, []).append((record.oid, record.tid)) return result
def buildRefmap(fs): '''build a refmap from a filestorage. look in every record of every transaction. build a dict of oid -> list((referenced oids, mod.klass)) ''' refmap = {} fsi = fs.iterator() size = os.stat(fs.__name__).st_size start = time.time() lastPercent = 0.0 interval = 0.005 print "[1/3] Computing the Reference Map" for txn in fsi: percent = float(fsi._file.tell())/float(size) * 100 if(percent - lastPercent > interval): spentTime = time.time() - start remainingTime = spentTime / float(fsi._file.tell()) * (float(size)) - spentTime sys.stdout.write("\r%f%% complete, time spent %s, remaining time: %s" % (percent,GetInHMS(time.time() - start, True), GetInHMS(remainingTime, False))) lastPercent = percent for rec in txn: if rec.data: mod, klass = get_pickle_metadata(rec.data) refs = referencesf(rec.data) refmap[rec.oid] = (refs, mod+"."+klass) print "\n", return refmap
def get_references(state): """Return the set of OIDs the given state refers to.""" refs = set() if state: for oid in referencesf(str(state)): refs.add(u64(oid)) return refs
def buildRefmap(fs): '''build a refmap from a filestorage. look in every record of every transaction. build a dict of oid -> list((referenced oids, mod.klass)) ''' refmap = {} fsi = fs.iterator() size = os.stat(fs.__name__).st_size start = time.time() lastPercent = 0.0 interval = 0.005 print "[1/3] Computing the Reference Map" for txn in fsi: percent = float(fsi._file.tell()) / float(size) * 100 if (percent - lastPercent > interval): spentTime = time.time() - start remainingTime = spentTime / float( fsi._file.tell()) * (float(size)) - spentTime sys.stdout.write( "\r%f%% complete, time spent %s, remaining time: %s" % (percent, GetInHMS(time.time() - start, True), GetInHMS(remainingTime, False))) lastPercent = percent for rec in txn: if rec.data: mod, klass = get_pickle_metadata(rec.data) refs = referencesf(rec.data) refmap[rec.oid] = (refs, mod + "." + klass) print "\n", return refmap
def pack(self, t, referencesf): with self._lock: rindex = {} rootl = ['\0\0\0\0\0\0\0\0'] # mark referenced objects while rootl: oid = rootl.pop() if oid in rindex: continue p = self._opickle[oid] referencesf(p, rootl) rindex[oid] = None # sweep unreferenced objects for oid in self._index.keys(): if oid not in rindex: self._takeOutGarbage(oid)
def findrefs(self, pos): """Return a list of oids referenced as of packtime.""" dh = self._read_data_header(pos) # Chase backpointers until we get to the record with the refs while dh.back: dh = self._read_data_header(dh.back) if dh.plen: return referencesf(self._file.read(dh.plen)) else: return []
def analyzeRecords(self, connection, records): cursor = connection.cursor() for record in records: current_oid = u64(record.oid) referred_oids = set(map(u64, referencesf(record.data))) for referred_oid in referred_oids or [-1]: cursor.execute(""" INSERT INTO links (source_oid, target_oid) VALUES (?, ?) """, (current_oid, referred_oid)) connection.commit()
def pack(self, t, referencesf): self._lock_acquire() try: rindex={} referenced=rindex.has_key rootl=['\0\0\0\0\0\0\0\0'] # mark referenced objects while rootl: oid=rootl.pop() if referenced(oid): continue p = self._opickle[oid] referencesf(p, rootl) rindex[oid] = None # sweep unreferenced objects for oid in self._index.keys(): if not referenced(oid): self._takeOutGarbage(oid) finally: self._lock_release()
def exportFile(self, oid, f=None): if f is None: f = TemporaryFile(prefix="EXP") elif isinstance(f, six.string_types): f = open(f, 'w+b') f.write(b'ZEXP') oids = [oid] done_oids = {} done = done_oids.__contains__ load = self._storage.load supports_blobs = IBlobStorage.providedBy(self._storage) while oids: oid = oids.pop(0) if oid in done_oids: continue done_oids[oid] = True try: p, serial = load(oid) except: logger.debug("broken reference for oid %s", repr(oid), exc_info=True) else: referencesf(p, oids) f.writelines([oid, p64(len(p)), p]) if supports_blobs: if not isinstance(self._reader.getGhost(p), Blob): continue # not a blob blobfilename = self._storage.loadBlob(oid, serial) f.write(blob_begin_marker) f.write(p64(os.stat(blobfilename).st_size)) blobdata = open(blobfilename, "rb") cp(blobdata, f) blobdata.close() f.write(export_end_marker) return f
def exportFile(self, oid, f=None): if f is None: f = TemporaryFile(prefix="EXP") elif isinstance(f, six.string_types): f = open(f,'w+b') f.write(b'ZEXP') oids = [oid] done_oids = {} done = done_oids.__contains__ load = self._storage.load supports_blobs = IBlobStorage.providedBy(self._storage) while oids: oid = oids.pop(0) if oid in done_oids: continue done_oids[oid] = True try: p, serial = load(oid) except: logger.debug("broken reference for oid %s", repr(oid), exc_info=True) else: referencesf(p, oids) f.writelines([oid, p64(len(p)), p]) if supports_blobs: if not isinstance(self._reader.getGhost(p), Blob): continue # not a blob blobfilename = self._storage.loadBlob(oid, serial) f.write(blob_begin_marker) f.write(p64(os.stat(blobfilename).st_size)) blobdata = open(blobfilename, "rb") cp(blobdata, f) blobdata.close() f.write(export_end_marker) return f
def _total_size(oid, seen): v = cache.get(oid) if v is not None: return v data, serialno = load_current(fs, oid) size = len(data) for suboid in referencesf(data): if suboid in seen: continue seen[suboid] = 1 size += _total_size(suboid, seen) cache[oid] = size if len(cache) == cache_size: cache.popitem() return size
def check_any_storage(storage): checked = set() to_check = deque([p64(0)]) while to_check: check_oid = to_check.popleft() checked.add(check_oid) try: state, tid = storage.load(check_oid) except POSKeyError: print "Bad reference found", u64(check_oid) for reference in referencesf(state): if reference not in checked: to_check.append(reference)
def main(path): fs = FileStorage(path, read_only=1) if PACK: fs.pack() db = ZODB.DB(fs) rt = db.open().root() paths = find_paths(rt, 3) def total_size(oid): cache = {} cache_size = 1000 def _total_size(oid, seen): v = cache.get(oid) if v is not None: return v data, serialno = load_current(fs, oid) size = len(data) for suboid in referencesf(data): if suboid in seen: continue seen[suboid] = 1 size += _total_size(suboid, seen) cache[oid] = size if len(cache) == cache_size: cache.popitem() return size return _total_size(oid, {}) keys = fs._index.keys() keys.sort() keys.reverse() if not VERBOSE: # If not running verbosely, don't print an entry for an object # unless it has an entry in paths. keys = filter(paths.has_key, keys) fmt = "%8s %5d %8d %s %s.%s" for oid in keys: data, serialno = load_current(fs, oid) mod, klass = get_pickle_metadata(data) refs = referencesf(data) path = paths.get(oid, '-') print(fmt % (U64(oid), len(data), total_size(oid), path, mod, klass))
def check_relstorage(connection): curr_objs = connection.cursor() cursor = connection.cursor() curr_objs.execute('select zoid, tid from current_object') for zoid, tid in curr_objs: cursor.execute('select state from object_state where zoid=%s and tid=%s', (zoid, tid)) state = cursor.fetchone()[0] if state is None: continue # How would an object have a null state? for ref_zoid in referencesf(state): ref_zoid = u64(ref_zoid) cursor.execute('select tid from current_object where zoid=%s', (ref_zoid,)) if cursor.rowcount == 0: print "Bad reference found", ref_zoid
def exportXML(jar, oid, file=None): # For performance reasons, exportXML does not use 'XMLrecord' anymore to map # oids. This requires to initialize MinimalMapping.marked_reference before # any string output, i.e. in ppml.Reference.__init__ # This also fixed random failures when DemoStorage is used, because oids # can have values that have a shorter representation in 'repr' instead of # 'base64' (see ppml.convert) and ppml.String does not support this. load = jar._storage.load pickle_dict = {oid: None} max_cache = [1e7] # do not cache more than 10MB of pickle data def getReorderedPickle(oid): p = pickle_dict[oid] if p is None: # Versions are ignored, but some 'load()' implementations require them # FIXME: remove "''" when TmpStore.load() on ZODB stops asking for it. p = load(oid, "")[0] p = reorderPickle(jar, p)[1] if len(p) < max_cache[0]: max_cache[0] -= len(p) pickle_dict[oid] = p return p # Sort records and initialize id_mapping id_mapping = ppml.MinimalMapping() reordered_oid_list = [oid] for oid in reordered_oid_list: _mapOid(id_mapping, oid) for oid in referencesf(getReorderedPickle(oid)): if oid not in pickle_dict: pickle_dict[oid] = None reordered_oid_list.append(oid) # Do real export if file is None: file = TemporaryFile() elif isinstance(file, basestring): file = open(file, "w+b") write = file.write write('<?xml version="1.0"?>\n<ZopeData>\n') for oid in reordered_oid_list: p = getReorderedPickle(oid) write(XMLrecord(oid, len(p), p, id_mapping)) write("</ZopeData>\n") return file
def exportXML(jar, oid, file=None): # For performance reasons, exportXML does not use 'XMLrecord' anymore to map # oids. This requires to initialize MinimalMapping.marked_reference before # any string output, i.e. in ppml.Reference.__init__ # This also fixed random failures when DemoStorage is used, because oids # can have values that have a shorter representation in 'repr' instead of # 'base64' (see ppml.convert) and ppml.String does not support this. load = jar._storage.load pickle_dict = {oid: None} max_cache = [1e7] # do not cache more than 10MB of pickle data def getReorderedPickle(oid): p = pickle_dict[oid] if p is None: # Versions are ignored, but some 'load()' implementations require them # FIXME: remove "''" when TmpStore.load() on ZODB stops asking for it. p = load(oid, '')[0] p = reorderPickle(jar, p)[1] if len(p) < max_cache[0]: max_cache[0] -= len(p) pickle_dict[oid] = p return p # Sort records and initialize id_mapping id_mapping = ppml.MinimalMapping() reordered_oid_list = [oid] for oid in reordered_oid_list: _mapOid(id_mapping, oid) for oid in referencesf(getReorderedPickle(oid)): if oid not in pickle_dict: pickle_dict[oid] = None reordered_oid_list.append(oid) # Do real export if file is None: file = TemporaryFile() elif isinstance(file, basestring): file = open(file, 'w+b') write = file.write write('<?xml version="1.0"?>\n<ZopeData>\n') for oid in reordered_oid_list: p = getReorderedPickle(oid) write(XMLrecord(oid, len(p), p, id_mapping)) write('</ZopeData>\n') return file
def _finish(self, tid, u, d, e): zeros = {} referenceCount = self._referenceCount referenceCount_get = referenceCount.get oreferences = self._oreferences serial = self._tid index = self._index opickle = self._opickle self._ltid = tid # iterate over all the objects touched by/created within this # transaction for entry in self._tmp: oid, data = entry[:] referencesl = [] referencesf(data, referencesl) references = {} for roid in referencesl: references[roid] = 1 # Create a reference count for this object if one # doesn't already exist if referenceCount_get(oid) is None: referenceCount[oid] = 0 # update references that are already associated with this # object roids = oreferences.get(oid, []) for roid in roids: if roid in references: # still referenced, so no need to update # remove it from the references dict so it doesn't # get "added" in the next clause del references[roid] else: # Delete the stored ref, since we no longer # have it oreferences[oid].remove(roid) # decrement refcnt: rc = referenceCount_get(roid, 1) rc = rc - 1 if rc < 0: # This should never happen raise ReferenceCountError( "%s (Oid %r had refcount %s)" % (ReferenceCountError.__doc__, roid, rc)) referenceCount[roid] = rc if rc == 0: zeros[roid] = 1 # Create a reference list for this object if one # doesn't already exist if oreferences.get(oid) is None: oreferences[oid] = [] # Now add any references that weren't already stored for roid in references.keys(): oreferences[oid].append(roid) # Create/update refcnt rc = referenceCount_get(roid, 0) if rc == 0 and zeros.get(roid) is not None: del zeros[roid] referenceCount[roid] = rc + 1 index[oid] = serial opickle[oid] = data now = time.time() self._conflict_cache[(oid, serial)] = data, now if zeros: for oid in zeros.keys(): if oid == '\0\0\0\0\0\0\0\0': continue self._takeOutGarbage(oid) self._tmp = []
def _finish(self, tid, u, d, e): zeros={} referenceCount=self._referenceCount referenceCount_get=referenceCount.get oreferences=self._oreferences serial=self._tid index=self._index opickle=self._opickle self._ltid = tid # iterate over all the objects touched by/created within this # transaction for entry in self._tmp: oid, data = entry[:] referencesl=[] referencesf(data, referencesl) references={} for roid in referencesl: references[roid]=1 referenced=references.has_key # Create a reference count for this object if one # doesn't already exist if referenceCount_get(oid) is None: referenceCount[oid] = 0 #zeros[oid]=1 # update references that are already associated with this # object roids = oreferences.get(oid, []) for roid in roids: if referenced(roid): # still referenced, so no need to update # remove it from the references dict so it doesn't # get "added" in the next clause del references[roid] else: # Delete the stored ref, since we no longer # have it oreferences[oid].remove(roid) # decrement refcnt: rc = referenceCount_get(roid, 1) rc=rc-1 if rc < 0: # This should never happen raise ReferenceCountError, ( "%s (Oid %s had refcount %s)" % (ReferenceCountError.__doc__,`roid`,rc) ) referenceCount[roid] = rc if rc==0: zeros[roid]=1 # Create a reference list for this object if one # doesn't already exist if oreferences.get(oid) is None: oreferences[oid] = [] # Now add any references that weren't already stored for roid in references.keys(): oreferences[oid].append(roid) # Create/update refcnt rc=referenceCount_get(roid, 0) if rc==0 and zeros.get(roid) is not None: del zeros[roid] referenceCount[roid] = rc+1 index[oid] = serial opickle[oid] = data now = time.time() self._conflict_cache[(oid, serial)] = data, now if zeros: for oid in zeros.keys(): if oid == '\0\0\0\0\0\0\0\0': continue self._takeOutGarbage(oid) self._tmp = []