def newTid(old): t = time.time() ts = TimeStamp(*time.gmtime(t)[:5]+(t%60,)) if old is not None: ts = ts.laterThan(TimeStamp(old)) return `ts`
def toTimeStamp(dt): utc_struct = dt.utctimetuple() # if this is a leapsecond, this will probably fail. That may be a good # thing: leapseconds are not really accounted for with serials. args = utc_struct[:5] + (utc_struct[5] + dt.microsecond / 1000000.0, ) return TimeStamp(*args)
def main(argv=sys.argv): parser = optparse.OptionParser(description=__doc__, usage="%prog [options] config_file") parser.add_option( "--dry-run", dest="dry_run", action="store_true", help="Attempt to open the storages, then explain what would be done") parser.add_option( "--clear", dest="clear", action="store_true", help="Clear the contents of the destination storage before copying") parser.add_option( "--incremental", dest="incremental", action="store_true", help="Assume the destination contains a partial copy of the source " "and resume copying from the last transaction. WARNING: no " "effort is made to verify that the destination holds the same " "transaction data before this point! Use at your own risk. " "Currently only supports RelStorage destinations.") parser.set_defaults(dry_run=False, clear=False) options, args = parser.parse_args(argv[1:]) if len(args) != 1: parser.error("The name of one configuration file is required.") logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(name)s] %(levelname)s %(message)s") schema = ZConfig.loadSchemaFile(BytesIO(schema_xml)) config, handler = ZConfig.loadConfig(schema, args[0]) source = config.source.open() destination = config.destination.open() log.info("Storages opened successfully.") if options.incremental: if not hasattr(destination, '_adapter'): msg = ("Error: no API is known for determining the last committed " "transaction of the destination storage. Aborting " "conversion.") sys.exit(msg) if not storage_has_data(destination): log.warning( "Destination empty, start conversion from the beginning.") else: last_tid = destination._adapter.txncontrol.get_tid( destination._load_cursor) next_tid = p64(last_tid + 1) source = source.iterator(start=next_tid) log.info("Resuming ZODB copy from %s", readable_tid_repr(next_tid)) if options.dry_run: log.info("Dry run mode: not changing the destination.") if storage_has_data(destination): log.warning("The destination storage has data.") count = 0 for txn in source.iterator(): log.info('%s user=%s description=%s' % (TimeStamp(txn.tid), txn.user, txn.description)) count += 1 log.info("Would copy %d transactions.", count) else: if options.clear: log.info("Clearing old data...") if hasattr(destination, 'zap_all'): destination.zap_all() else: msg = ("Error: no API is known for clearing this type " "of storage. Use another method.") sys.exit(msg) log.info("Done clearing old data.") if storage_has_data(destination): msg = "Error: the destination storage has data. Try --clear." sys.exit(msg) destination.copyTransactionsFrom(source) source.close() destination.close()
def _get_timestamp(self): t = time.time() t = TimeStamp(*time.gmtime(t)[:5] + (t % 60, )) return ` t `
def readable_tid_repr(tid): result = tid_repr(tid) if isinstance(tid, str) and len(tid) == 8: result = "%s %s" % (result, TimeStamp(tid)) return result
def recover(inp, outp, verbose=0, partial=False, force=False, pack=None): print "Recovering", inp, "into", outp if os.path.exists(outp) and not force: die("%s exists" % outp) f = open(inp, "rb") if f.read(4) != ZODB.FileStorage.packed_version: die("input is not a file storage") f.seek(0,2) file_size = f.tell() ofs = ZODB.FileStorage.FileStorage(outp, create=1) _ts = None ok = 1 prog1 = 0 undone = 0 pos = 4L ltid = None while pos: try: npos, txn, tid = read_txn_header(f, pos, file_size, outp, ltid) except EOFError: break except (KeyboardInterrupt, SystemExit): raise except Exception, err: print "error reading txn header:", err if not verbose: progress(prog1) pos = scan(f, pos) if verbose > 1: print "looking for valid txn header at", pos continue ltid = tid if txn is None: undone = undone + npos - pos pos = npos continue else: pos = npos tid = txn.tid if _ts is None: _ts = TimeStamp(tid) else: t = TimeStamp(tid) if t <= _ts: if ok: print ("Time stamps out of order %s, %s" % (_ts, t)) ok = 0 _ts = t.laterThan(_ts) tid = `_ts` else: _ts = t if not ok: print ("Time stamps back in order %s" % (t)) ok = 1 ofs.tpc_begin(txn, tid, txn.status) if verbose: print "begin", pos, _ts, if verbose > 1: print sys.stdout.flush() nrec = 0 try: for r in txn: if verbose > 1: if r.data is None: l = "bp" else: l = len(r.data) print "%7d %s %s" % (u64(r.oid), l) ofs.restore(r.oid, r.tid, r.data, '', r.data_txn, txn) nrec += 1 except (KeyboardInterrupt, SystemExit): raise except Exception, err: if partial and nrec: ofs._status = "p" ofs.tpc_vote(txn) ofs.tpc_finish(txn) if verbose: print "partial" else: ofs.tpc_abort(txn) print "error copying transaction:", err if not verbose: progress(prog1) pos = scan(f, pos) if verbose > 1: print "looking for valid txn header at", pos
def timestamp(minutes): import time from persistent.TimeStamp import TimeStamp t = time.time() + 60 * minutes return TimeStamp(*time.gmtime(t)[:5] + (t % 60, ))
def main(): usage = "usage: %prog [options] filename" parser = OptionParser(usage=usage) parser.add_option("-n", "--number", dest="num", help="display only the n biggest objects", default=-1, type="int") parser.add_option("-f", "--output", dest="filename", action="store", type="string", help="the FileStorage") parser.add_option("-v", "--verbose", dest="verbose", action="store_false", help="show percentage and time remaining") (options, args) = parser.parse_args() VERBOSE = False if options.filename: fname = options.filename else: print "You have to enter the FileStorage filename, see --help for details" return 2 if options.verbose != None: VERBOSE = True objectsToDisplay = options.num stats = {} start = time.time() size = os.stat(fname).st_size it = ZODB.FileStorage.FileIterator(fname) lastPercent = 0.0 recordsCounter = 0 interval = 0.005 now = datetime.date.today() try: for t in it: percent = float(it._file.tell()) / float(size) * 100 #Show the percentage of the work completed and the remaining time if (percent - lastPercent > interval): spentTime = time.time() - start remainingTime = spentTime / float( it._file.tell()) * (float(size)) - spentTime if VERBOSE: sys.stderr.write( "\r%f%% complete, time spent %s, remaining time: %s, recordsCounter %d" % (percent, GetInHMS(time.time() - start), GetInHMS(remainingTime), recordsCounter)) sys.stdout.flush() lastPercent = percent for r in t: #need to reduce the time of the dictionary stats from time to time ts = TimeStamp(t.tid) then = datetime.date(int(ts.year()), int(ts.month()), int(ts.day())) delta = timedelta(days=3) #don't reduce the size of the dictionary when analysing last 3 days transactions if recordsCounter % (objectsToDisplay * 100) == 0 and ( now - then > delta): tmp = {} for class_name, s in sorted( stats.items(), key=lambda (k, v): v.size, reverse=True)[0:objectsToDisplay]: tmp[class_name] = s stats = tmp if r.data: mod, klass = get_pickle_metadata(r.data) l = len(r.data) class_name = mod + "." + klass + " oid: " + oid_repr( r.oid).strip() stat = stats.get(class_name) if stat is None: stat = stats[class_name] = Stat() stat.size = stat.min = stat.max = l stat.oid = oid_repr(r.oid).strip() stat.className = mod + "." + klass stat.number = 1 else: stat.min = min(stat.min, l) stat.max = max(stat.max, l) stat.number = stat.number + 1 stat.size = stat.size + l recordsCounter += 1 except KeyboardInterrupt: pass print "\n" print "%-41s %9s %15s %15s %9s %9s %9s" % ("Module.ClassName", "Oid", "Percentage", "Total Size", "Min", "Max", "Copies") print "%s" % "_" * 114 for class_name, s in sorted(stats.items(), key=lambda (k, v): v.size, reverse=True)[0:objectsToDisplay]: class_name = s.className if len(class_name) > 40: class_name = class_name[::-1][0:35][::-1] class_name = "[..]" + class_name print "%-40s | %8s | %13f%% | %13s | %7s | %7s | %7s" % ( class_name, s.oid, (s.size * 100.0 / size), pretty_size( s.size), pretty_size(s.min), pretty_size(s.max), s.number)
def get_timestamp(self): return TimeStamp(self.tid)
def copy(source, dest, verbose=0): """Copy transactions from a source to a destination storage This is typically used for converting data from one storage to another. `source` must have an .iterator() method. """ _ts = None ok = 1 preindex = {} preget = preindex.get # restore() is a new storage API method which has an identical # signature to store() except that it does not return anything. # Semantically, restore() is also identical to store() except that it # doesn't do the ConflictError or VersionLockError consistency # checks. The reason to use restore() over store() in this method is # that store() cannot be used to copy transactions spanning a version # commit or abort, or over transactional undos. # # We'll use restore() if it's available, otherwise we'll fall back to # using store(). However, if we use store, then # copyTransactionsFrom() may fail with VersionLockError or # ConflictError. restoring = hasattr(dest, 'restore') fiter = source.iterator() for transaction in fiter: tid = transaction.tid if _ts is None: _ts = TimeStamp(tid) else: t = TimeStamp(tid) if t <= _ts: if ok: print('Time stamps out of order %s, %s' % (_ts, t)) ok = 0 _ts = t.laterThan(_ts) tid = ` _ts ` else: _ts = t if not ok: print('Time stamps back in order %s' % (t)) ok = 1 if verbose: print _ts dest.tpc_begin(transaction, tid, transaction.status) for r in transaction: oid = r.oid if verbose: print oid_repr(oid), r.version, len(r.data) if restoring: dest.restore(oid, r.tid, r.data, r.version, r.data_txn, transaction) else: pre = preget(oid, None) s = dest.store(oid, pre, r.data, r.version, transaction) preindex[oid] = s dest.tpc_vote(transaction) dest.tpc_finish(transaction) fiter.close()
def main(): usage = "usage: %prog [options] filename" parser = OptionParser(usage=usage) parser.add_option("-n", "--number", dest="num", help="display only the 'n' busiest days", default=20, type="int") parser.add_option("-f", "--file", dest="filename", action="store", type="string", help="your FileStorage") parser.add_option( "-d", "--date", dest="date", action="store", type="string", help="show the stats only for the date d (format dd-mm-yyyy)") parser.add_option("-a", "--days", dest="days", action="store", default="0", type="string", help="show the stats only for the last 'a' days") parser.add_option("-v", "--verbose", dest="verbose", action="store_false", help="show percentage and time remaining") (options, args) = parser.parse_args() objectsToDisplay = options.num VERBOSE = False if options.filename: fname = options.filename else: print "You have to enter the filename, see --help for details" return 2 if options.verbose != None: VERBOSE = True stats = {} start = time.time() size = os.stat(fname).st_size it = ZODB.FileStorage.FileIterator(fname) lastPercent = 0.0 recordsCounter = 0 interval = 0.005 dataFound = False now = datetime.date.today() try: for t in it: #Format the date of the current transaction following dd-mm-yyyy ts = TimeStamp(t.tid) then = datetime.date(int(ts.year()), int(ts.month()), int(ts.day())) delta = timedelta(days=int(options.days)) if ((not int(options.days)) or (now - then < delta)): dateT = strftime("%d-%m-%Y", [ int(ts.year()), int(ts.month()), int(ts.day()), 1, 1, 1, 1, 1, 1 ]) percent = float(it._file.tell()) / float(size) * 100 #Check if we found the searched date if options.date: if str(dateT) == str(options.date): dataFound = True elif dataFound: break #Show the percentage of the work completed and the remaining time if (percent - lastPercent > interval): spentTime = time.time() - start remainingTime = spentTime / float( it._file.tell()) * (float(size)) - spentTime if VERBOSE: sys.stderr.write( "\r%f%% complete, time spent %s, remaining time: %s, recordsCounter %d" % (percent, GetInHMS(time.time() - start, True), GetInHMS(remainingTime, False), recordsCounter)) lastPercent = percent stat = stats.get(dateT) if stat is None: stat = stats[dateT] = Stat() stat.n = 1 else: stat.n += 1 for r in t: #need to reduce the time of the dictionary stats from time to time if recordsCounter % (objectsToDisplay * 100) == 0: tmp = {} for date, s in sorted( stats.items(), key=lambda (k, v): v.n, reverse=True)[0:objectsToDisplay]: tmp[date] = s try: tmp[dateT] = stats[dateT] except KeyError: pass stats = tmp if r.data: mod, klass = get_pickle_metadata(r.data) l = len(r.data) stat = stats.get(dateT) stat.records += 1 recordsCounter += 1 stat = stats.get(dateT) if stat is not None: stat.mean.append(TimeStamp(t.tid).timeTime()) except KeyboardInterrupt: pass print "\n" print "%-15s %17s %17s %22s" % ("Date", "Transactions", "Records Changed", "Average interval") print "%s" % "_" * 74 if options.date: for date, s in sorted(stats.items(), key=lambda (k, v): v.n, reverse=True): meanTime = 0 for i in range(1, len(s.mean)): meanTime += s.mean[i] - s.mean[i - 1] if str(date) == str(options.date): print "%-15s | %15d | %15d | %15f secs" % (date, (s.n), s.records, meanTime / s.n) else: for date, s in sorted(stats.items(), key=lambda (k, v): v.n, reverse=True)[0:objectsToDisplay]: meanTime = 0 for i in range(1, len(s.mean)): meanTime += s.mean[i] - s.mean[i - 1] print "%-15s | %15d | %15d | %15f secs" % (date, (s.n), s.records, meanTime / s.n)
def checkCompare(self): ts1 = TimeStamp(1972, 6, 27) ts2 = TimeStamp(1971, 12, 12) self.assert_(ts1 > ts2) self.assert_(ts2 <= ts1)
def checkLaterThan(self): t = time.gmtime() ts = TimeStamp(*t[:6]) ts2 = ts.laterThan(ts) self.assert_(ts2 > ts)
def recover(inp, outp, verbose=0, partial=False, force=False, pack=None): print("Recovering", inp, "into", outp) if os.path.exists(outp) and not force: die("%s exists" % outp) f = open(inp, "rb") if f.read(4) != ZODB.FileStorage.packed_version: die("input is not a file storage") f.seek(0,2) file_size = f.tell() ofs = ZODB.FileStorage.FileStorage(outp, create=1) _ts = None ok = 1 prog1 = 0 undone = 0 pos = 4 ltid = None while pos: try: npos, txn, tid = read_txn_header(f, pos, file_size, outp, ltid) except EOFError: break except (KeyboardInterrupt, SystemExit): raise except Exception as err: print("error reading txn header:", err) if not verbose: progress(prog1) pos = scan(f, pos) if verbose > 1: print("looking for valid txn header at", pos) continue ltid = tid if txn is None: undone = undone + npos - pos pos = npos continue else: pos = npos tid = txn.tid if _ts is None: _ts = TimeStamp(tid) else: t = TimeStamp(tid) if t <= _ts: if ok: print(("Time stamps out of order %s, %s" % (_ts, t))) ok = 0 _ts = t.laterThan(_ts) tid = _ts.raw() else: _ts = t if not ok: print(("Time stamps back in order %s" % (t))) ok = 1 ofs.tpc_begin(txn, tid, txn.status) if verbose: print("begin", pos, _ts, end=' ') if verbose > 1: print() sys.stdout.flush() nrec = 0 try: for r in txn: if verbose > 1: if r.data is None: l = "bp" else: l = len(r.data) print("%7d %s %s" % (u64(r.oid), l)) ofs.restore(r.oid, r.tid, r.data, '', r.data_txn, txn) nrec += 1 except (KeyboardInterrupt, SystemExit): raise except Exception as err: if partial and nrec: ofs._status = "p" ofs.tpc_vote(txn) ofs.tpc_finish(txn) if verbose: print("partial") else: ofs.tpc_abort(txn) print("error copying transaction:", err) if not verbose: progress(prog1) pos = scan(f, pos) if verbose > 1: print("looking for valid txn header at", pos) else: ofs.tpc_vote(txn) ofs.tpc_finish(txn) if verbose: print("finish") sys.stdout.flush() if not verbose: prog = pos * 20 / file_size while prog > prog1: prog1 = prog1 + 1 iprogress(prog1) bad = file_size - undone - ofs._pos print("\n%s bytes removed during recovery" % bad) if undone: print("%s bytes of undone transaction data were skipped" % undone) if pack is not None: print("Packing ...") from ZODB.serialize import referencesf ofs.pack(pack, referencesf) ofs.close() f.close()
def pack(self, t, referencesf): # Packing is hard, at least when undo is supported. # Even for a simple storage like this one, packing # is pretty complex. self._lock_acquire() try: stop=`TimeStamp(*time.gmtime(t)[:5]+(t%60,))` # Build indexes up to the pack time: index, vindex = self._build_indexes(stop) # TODO: This packing algorithm is flawed. It ignores # references from non-current records after the pack # time. # Now build an index of *only* those objects reachable # from the root. rootl = [z64] pindex = {} while rootl: oid = rootl.pop() if oid in pindex: continue # Scan non-version pickle for references r = index.get(oid, None) if r is None: if self._base: p, s = self._base.load(oid, '') referencesf(p, rootl) else: pindex[oid] = r oid, pre, vdata, p, tid = r referencesf(p, rootl) if vdata: nv = vdata[1] if nv: oid, pre, vdata, p, tid = nv referencesf(p, rootl) # Now we're ready to do the actual packing. # We'll simply edit the transaction data in place. # We'll defer deleting transactions till the end # to avoid messing up the BTree items. deleted = [] for tid, (p, u, d, e, records) in self._data.items(): if tid >= stop: break o = [] for r in records: c = pindex.get(r[0]) if c is None: # GC this record, no longer referenced continue if c == r: # This is the most recent revision. o.append(r) else: # This record is not the indexed record, # so it may not be current. Let's see. vdata = r[3] if vdata: # Version record are current *only* if they # are indexed continue else: # OK, this isn't a version record, so it may be the # non-version record for the indexed record. vdata = c[3] if vdata: if vdata[1] != r: # This record is not the non-version # record for the indexed record continue else: # The indexed record is not a version record, # so this record can not be the non-version # record for it. continue o.append(r) if o: if len(o) != len(records): self._data[tid] = 1, u, d, e, tuple(o) # Reset data else: deleted.append(tid) # Now delete empty transactions for tid in deleted: del self._data[tid] # Now reset previous pointers for "current" records: for r in pindex.values(): r[1] = None # Previous record if r[2] and r[2][1]: # vdata # If this record contains version data and # non-version data, then clear it out. r[2][1][2] = None # Finally, rebuild indexes from transaction data: self._index, self._vindex = self._build_indexes() finally: self._lock_release() self.getSize()
def run(path, days, notPacked): f = open(path, "rb") f.seek(0, 2) size = os.path.getsize(path) now = datetime.date.today() notPackedDays = [] for day in range(notPacked): notPackedDays.append(str(now - timedelta(days=day + 1))) #day->size stats = {} th = prev_txn(f) bool = True while bool: ts = TimeStamp(th.tid) then = datetime.date(int(ts.year()), int(ts.month()), int(ts.day())) delta = timedelta(days=int(days)) if (now - then < delta): dateT = strftime("%Y-%m-%d", [ int(ts.year()), int(ts.month()), int(ts.day()), 1, 1, 1, 1, 1, 1 ]) try: stats[dateT] = stats[dateT] + th.length except KeyError: stats[dateT] = th.length else: bool = False th = th.prev_txn() f.close() total = 0 totalPacked = 0 daysPacked = 0 for (d, s) in sorted(stats.items(), key=lambda (k, v): v, reverse=True): print d, "size:", pretty_size(s), date = str(d) if (date in notPackedDays or date == str(now)): print "(not yet packed)" else: totalPacked = totalPacked + s daysPacked = daysPacked + 1 print total = total + s if int(totalPacked): average = totalPacked / int(daysPacked) else: average = 0 print "\n-- ALREADY PACKED DAYS--" print "The amount of data added in", daysPacked, "days is", pretty_size( totalPacked) print "Average", pretty_size(average), "per day" print "Following this trend, the size of the database will be:" print "\t", pretty_size(average * 365 + size), " in 1 year" print "\t", pretty_size(average * 365 * 2 + size), " in 2 years" print "\t", pretty_size(average * 365 * 10 + size), " in 10 years" print "\n-- ALL DAYS --" print "The amount of data added in", days, "days is", pretty_size(total) if int(total): print "Average", pretty_size(total / int(days)), "per day" else: print "Average 0bytes per day"