def run(path, days, notPacked): f = open(path, "rb") f.seek(0, 2) now = datetime.date.today() #day->size stats = {} th = prev_txn(f) bool = True while bool: ts = TimeStamp(th.tid) then = datetime.date(int(ts.year()), int(ts.month()), int(ts.day())) delta = timedelta(days=int(days)) if( not(now - then < delta)): bool = False th = th.prev_txn() reader = Reader() iterator = FileIterator(path, pos=th._pos) for i in iterator: object_types = {} for o in i: ot = reader.getIdentity(o.data) try: stats[ot] = stats[ot] + 1 except KeyError: stats[ot] = 1 f.close() for (o,n) in sorted(stats.items(), key=lambda (k,v): v, reverse=True): print "%6d: %s" % (n,o)
def run(path, tid): f = open(path, "rb") f.seek(0, 2) th = prev_txn(f) while (str(TimeStamp(th.tid)) != tid): th = th.prev_txn() reader = Reader() iterator = FileIterator(path, pos=th._pos) header = TxnHeader(f, th._pos) for i in iterator: if (str(TimeStamp(i.tid)) == tid): print "\nTRANSACTION: ", TimeStamp( i.tid), i.user, i.description, pretty_size(header.length), "\n" header = header.next_txn() object_types = {} for o in i: ot = reader.getIdentity(o.data) print " - ", ot, pretty_size(len(o.data)) ob = cPickle.loads(o.data) # Not sure why some objects are stored as tuple (object, ()) if type(ob) == tuple and len(ob) == 2: ob = ob[0] if hasattr(ob, "__dict__"): for i in ob.__dict__.items(): if str(i[0]) == "__doc__": print "\t('__doc__',", i[1], ")" elif not callable(i[1]): print "\t", i else: print "can't extract:" + str(ob) break f.close()
def run(path, tid): f = open(path, "rb") f.seek(0, 2) th = prev_txn(f) while (str(TimeStamp(th.tid)) != tid): th = th.prev_txn() reader = Reader() iterator = FileIterator(path, pos=th._pos) header = TxnHeader(f,th._pos) for i in iterator: if(str(TimeStamp(i.tid)) == tid): print "\nTRANSACTION: ", TimeStamp(i.tid), i.user, i.description, pretty_size(header.length),"\n" header = header.next_txn() object_types = {} for o in i: ot = reader.getIdentity(o.data) print " - ", ot, pretty_size(len(o.data)) ob = cPickle.loads(o.data) # Not sure why some objects are stored as tuple (object, ()) if type(ob) == tuple and len(ob) == 2: ob = ob[0] if hasattr(ob, "__dict__"): for i in ob.__dict__.items(): if str(i[0]) == "__doc__": print "\t('__doc__',", i[1],")" elif not callable(i[1]): print "\t",i else: print "can't extract:" + str(ob) break f.close()
def run(path, ntxn, orderTransactions): f = open(path, "rb") f.seek(0, 2) th = prev_txn(f) for i in range(ntxn - 1): th = th.prev_txn() reader = Reader() iterator = FileIterator(path, pos=th._pos) header = TxnHeader(f, th._pos) transactions = [] for i in iterator: transactions.append({ "tid": TimeStamp(i.tid), "user": i.user, "desc": i.description, "len": header.length, "objs": None }) header = header.next_txn() object_types = {} for o in i: ot = reader.getIdentity(o.data) if ot in object_types: size, count = object_types[ot] object_types[ot] = (size + len(o.data), count + 1) else: object_types[ot] = (len(o.data), 1) keys = object_types.keys() transactions[-1]["objs"] = object_types f.close() if orderTransactions: transactions = sorted(transactions, key=lambda (d): d["len"], reverse=True) for tr in transactions: print "\n\nTRANSACTION: ", tr["tid"], tr["user"], tr[ "desc"], pretty_size(tr["len"]) object_types = tr["objs"] keys = object_types.keys() for k in sorted(keys, key=lambda (k): object_types[k][0], reverse=True): # count, class, size (aggregate) print " - ", object_types[k][1], k, pretty_size(object_types[k][0])
def main(path, ntxn): f = open(path, "rb") f.seek(0, 2) th = prev_txn(f) i = ntxn while th and i > 0: hash = sha1(th.get_raw_data()).digest() l = len(str(th.get_timestamp())) + 1 th.read_meta() print "%s: hash=%s" % (th.get_timestamp(), binascii.hexlify(hash)) print("user=%r description=%r length=%d offset=%d" % (th.user, th.descr, th.length, th.get_data_offset())) print th = th.prev_txn() i -= 1
def main(path, ntxn): f = open(path, "rb") f.seek(0, 2) th = prev_txn(f) i = ntxn while th and i > 0: hash = sha1(th.get_raw_data()).digest() l = len(str(th.get_timestamp())) + 1 th.read_meta() print "%s: hash=%s" % (th.get_timestamp(), binascii.hexlify(hash)) print ("user=%r description=%r length=%d offset=%d" % (th.user, th.descr, th.length, th.get_data_offset())) print th = th.prev_txn() i -= 1
def run(path, ntxn, orderTransactions): f = open(path, "rb") f.seek(0, 2) th = prev_txn(f) for i in range(ntxn - 1): th = th.prev_txn() reader = Reader() iterator = FileIterator(path, pos=th._pos) header = TxnHeader(f, th._pos) transactions = [] for i in iterator: transactions.append( {"tid": TimeStamp(i.tid), "user": i.user, "desc": i.description, "len": header.length, "objs": None} ) header = header.next_txn() object_types = {} for o in i: ot = reader.getIdentity(o.data) if ot in object_types: size, count = object_types[ot] object_types[ot] = (size + len(o.data), count + 1) else: object_types[ot] = (len(o.data), 1) keys = object_types.keys() transactions[-1]["objs"] = object_types f.close() if orderTransactions: transactions = sorted(transactions, key=lambda (d): d["len"], reverse=True) for tr in transactions: print "\n\nTRANSACTION: ", tr["tid"], tr["user"], tr["desc"], pretty_size(tr["len"]) object_types = tr["objs"] keys = object_types.keys() for k in sorted(keys, key=lambda (k): object_types[k][0], reverse=True): # count, class, size (aggregate) print " - ", object_types[k][1], k, pretty_size(object_types[k][0])
def run(path, days, notPacked): f = open(path, "rb") f.seek(0, 2) size = os.path.getsize(path) now = datetime.date.today() notPackedDays = [] for day in range(notPacked): notPackedDays.append(str(now - timedelta(days=day + 1))) # day->size stats = {} th = prev_txn(f) bool = True while bool: ts = TimeStamp(th.tid) then = datetime.date(int(ts.year()), int(ts.month()), int(ts.day())) delta = timedelta(days=int(days)) if now - then < delta: dateT = strftime("%Y-%m-%d", [int(ts.year()), int(ts.month()), int(ts.day()), 1, 1, 1, 1, 1, 1]) try: stats[dateT] = stats[dateT] + th.length except KeyError: stats[dateT] = th.length else: bool = False th = th.prev_txn() f.close() total = 0 totalPacked = 0 daysPacked = 0 for (d, s) in sorted(stats.items(), key=lambda (k, v): v, reverse=True): print d, "size:", pretty_size(s), date = str(d) if date in notPackedDays or date == str(now): print "(not yet packed)" else: totalPacked = totalPacked + s daysPacked = daysPacked + 1 print total = total + s if int(totalPacked): average = totalPacked / int(daysPacked) else: average = 0 print "\n-- ALREADY PACKED DAYS--" print "The amount of data added in", daysPacked, "days is", pretty_size(totalPacked) print "Average", pretty_size(average), "per day" print "Following this trend, the size of the database will be:" print "\t", pretty_size(average * 365 + size), " in 1 year" print "\t", pretty_size(average * 365 * 2 + size), " in 2 years" print "\t", pretty_size(average * 365 * 10 + size), " in 10 years" print "\n-- ALL DAYS --" print "The amount of data added in", days, "days is", pretty_size(total) if int(total): print "Average", pretty_size(total / int(days)), "per day" else: print "Average 0bytes per day"
def run(path, days, notPacked): f = open(path, "rb") f.seek(0, 2) size = os.path.getsize(path) now = datetime.date.today() notPackedDays = [] for day in range(notPacked): notPackedDays.append(str(now - timedelta(days=day + 1))) #day->size stats = {} th = prev_txn(f) bool = True while bool: ts = TimeStamp(th.tid) then = datetime.date(int(ts.year()), int(ts.month()), int(ts.day())) delta = timedelta(days=int(days)) if (now - then < delta): dateT = strftime("%Y-%m-%d", [ int(ts.year()), int(ts.month()), int(ts.day()), 1, 1, 1, 1, 1, 1 ]) try: stats[dateT] = stats[dateT] + th.length except KeyError: stats[dateT] = th.length else: bool = False th = th.prev_txn() f.close() total = 0 totalPacked = 0 daysPacked = 0 for (d, s) in sorted(stats.items(), key=lambda (k, v): v, reverse=True): print d, "size:", pretty_size(s), date = str(d) if (date in notPackedDays or date == str(now)): print "(not yet packed)" else: totalPacked = totalPacked + s daysPacked = daysPacked + 1 print total = total + s if int(totalPacked): average = totalPacked / int(daysPacked) else: average = 0 print "\n-- ALREADY PACKED DAYS--" print "The amount of data added in", daysPacked, "days is", pretty_size( totalPacked) print "Average", pretty_size(average), "per day" print "Following this trend, the size of the database will be:" print "\t", pretty_size(average * 365 + size), " in 1 year" print "\t", pretty_size(average * 365 * 2 + size), " in 2 years" print "\t", pretty_size(average * 365 * 10 + size), " in 10 years" print "\n-- ALL DAYS --" print "The amount of data added in", days, "days is", pretty_size(total) if int(total): print "Average", pretty_size(total / int(days)), "per day" else: print "Average 0bytes per day"