def mongo_sum2(app, args): """ Show the associated mongodb record """ gb1_field = "${}".format(args.group_by_1) gb2_field = "${}".format(args.group_by_2) gb_pair_field = "${}_${}".format(gb1_field, gb2_field) MONGO_mad = get_mongo_db(app) if args.sort_on_field: sort_field = '_id' sort_order = 1 else: sort_field = 'total' sort_order = -1 res = MONGO_mad.aggregate([ {'$group': { "_id": { "group1": gb1_field, "group2": gb2_field }, "total": {"$sum": "$filesize"}, "count": {"$sum": 1}}}, {"$sort" : { "sort_field": sort_order }} ]) total_size = 0 total_count = 0 gl1 = gl2 = len("Total") for r in res['result']: #print(r) g1 = str(r['_id'].get('group1')) g2 = str(r['_id'].get('group2')) gl1 = max(gl1, len(g1)) gl2 = max(gl2, len(g2)) fms = "{:" + str(gl1) + "} {:" + str(gl2) + "} {:>10} {:>9}" for r in res['result']: g1 = str(r['_id'].get('group1', '-')) g2 = str(r['_id'].get('group2', '-')) total = r['total'] count = r['count'] total_size += total total_count += count if args.human: total = humansize(total) print(fms.format(g1,g2, total, count)) else: print("{}\t{}\t{}\t{}".format(g1, g2, total, count)) if args.human: total_size = humansize(total_size) print(fms.format( "Total", "", total, count)) else: print("Total\t\t{}\t{}".format(total_size, total_count))
def waste(app, args): db = get_mongo_transient_db(app) res = _run_waste_command(app, 'waste_pipeline', force=args.force) if args.todb: dbrec = {'time': datetime.datetime.utcnow(), 'data': res} db = mad2.util.get_mongo_db(app) db.waste.insert_one(dbrec) return def cprint_nocolor(*args, **kwargs): if 'color' in kwargs: del kwargs['color'] if len(args) > 1: args = args[:1] print(*args, **kwargs) # if args.no_color: # cprint = cprint_nocolor for i, r in enumerate(res): if i >= args.no_records: break sha1sum = r['_id'] if not sha1sum.strip(): continue cprint(sha1sum, 'yellow', end='') cprint(" sz ", "grey", end="") cprint("{:>9}".format(humansize(r['waste'])), end='') cprint(" w ", "grey", end="") cprint("{:>9}".format(humansize(r['filesize'])), end='') hostcount = collections.defaultdict(lambda: 0) hostsize = collections.defaultdict(lambda: 0) owners = set() for f in db.find({'sha1sum': sha1sum}): owners.add(f['username']) host = f['host'] hostcount[host] += 1 hostsize[host] += float(f['filesize']) / float(f['nlink']) for h in hostcount: print(' ', end='') cprint(h, 'green', end=':') cprint(hostcount[h], 'cyan', end="") cprint(" ", end="") cprint(", ".join(owners), 'red')
def mongo_sum(app, args): """ Show the associated mongodb record """ res = _single_sum(app, group_by=args.group_by, force=args.force) total_size = int(0) total_count = 0 mgn = len("Total") for reshost in res: gid = reshost['_id'] if gid is None: mgn = max(4, mgn) else: mgn = max(len(str(reshost['_id'])), mgn) fms = "{:" + str(mgn) + "}\t{:>10}\t{:>9}" for reshost in res: total = reshost['total'] count = reshost['count'] total_size += int(total) total_count += count if args.human: total_human = humansize(total) categ = reshost['_id'] if categ is None: categ = "<undefined>" print(fms.format( categ, total_human, count)) else: print("{}\t{}\t{}".format( reshost['_id'], total, count)) if args.human: total_size_human = humansize(total_size) print(fms.format( "Total", total_size_human, count)) else: print("Total\t{}\t{}".format(total_size, total_count))
def mongo_sum(app, args): """ Show the associated mongodb record """ res = _single_sum(app, group_by=args.group_by, force=args.force) total_size = int(0) total_count = 0 mgn = len("Total") for reshost in res: gid = reshost['_id'] if gid is None: mgn = max(4, mgn) else: mgn = max(len(str(reshost['_id'])), mgn) fms = "{:" + str(mgn) + "}\t{:>10}\t{:>9}" for reshost in res: total = reshost['total'] count = reshost['count'] total_size += int(total) total_count += count if args.human: total_human = humansize(total) categ = reshost['_id'] if categ is None: categ = "<undefined>" print(fms.format(categ, total_human, count)) else: print("{}\t{}\t{}".format(reshost['_id'], total, count)) if args.human: total_size_human = humansize(total_size) print(fms.format("Total", total_size_human, count)) else: print("Total\t{}\t{}".format(total_size, total_count))
def waste(app, args): db = get_mongo_transient_db(app) res = _run_waste_command(app, 'waste_pipeline', force=args.force)['result'] for i, r in enumerate(res): if i >= args.no_records: break sha1sum = r['_id'] if not sha1sum.strip(): continue cprint(sha1sum, 'grey', end='') cprint(" sz ", "grey", end="") cprint("{:>9}".format(humansize(r['waste'])), end='') cprint(" w ", "grey", end="") cprint("{:>9}".format(humansize(r['filesize'])), end='') hostcount = collections.defaultdict(lambda: 0) hostsize = collections.defaultdict(lambda: 0) owners = set() for f in db.find({'sha1sum': sha1sum}): owners.add(f['username']) host = f['host'] hostcount[host] += 1 hostsize[host] += float(f['filesize']) / float(f['nlink']) for h in hostcount: print(' ', end='') cprint(h, 'green', end=':') cprint(hostcount[h], 'cyan', end="") cprint(" ", end="") cprint(", ".join(owners), 'red')
def _process_query(query, madfile_in): res = MONGO_mad.find(query) res = list(res) for r in res: if args.volume and \ r['volume'] != args.volume: continue if args.path_fragment and \ args.path_fragment not in r['fullpath']: continue if args.echo: if len(res) > 1: print(madfile_in['inputfile']) break days = (arrow.now() - arrow.get(r['save_time'])).days symlink = r.get('is_symlink', False) if symlink: stag = 'S' else: stag = '.' if args.raw_output: print("\t".join( map(str, [ r['nlink'], stag, (arrow.now() - arrow.get(r['save_time'])), r['filesize'], r['host'], r['fullpath'] ]))) else: cprint('%1d%s' % (r['nlink'], stag), 'yellow', end=" ") cprint('%3d' % days, 'green', end="d ") cprint('%6s' % humansize(r['filesize']), 'white', end=" ") if r['host'] in backup_hosts: cprint(r['host'], 'green', attrs=['bold'], end=':') else: cprint(r['host'], 'cyan', end=':') cprint(r['fullpath'])
def _process_query(query, madfile_in): res = MONGO_mad.find(query) res = list(res) for r in res: if args.volume and \ r['volume'] != args.volume: continue if args.path_fragment and \ args.path_fragment not in r['fullpath']: continue if args.echo: if len(res) > 1: print(madfile_in['inputfile']) break days = (arrow.now() - arrow.get(r['save_time'])).days symlink = r.get('is_symlink', False) if symlink: stag = 'S' else: stag = '.' if args.raw_output: print("\t".join(map(str, [ r['nlink'], stag, (arrow.now() - arrow.get(r['save_time'])), r['filesize'], r['host'], r['fullpath'] ]))) else: cprint('%1d%s' % (r['nlink'], stag), 'yellow', end=" ") cprint('%3d' % days, 'green', end="d ") cprint('%6s' % humansize(r['filesize']), 'white', end=" ") if r['host'] in backup_hosts: cprint(r['host'], 'green', attrs=['bold'], end=':') else: cprint(r['host'], 'cyan', end=':') cprint(r['fullpath'])
def mongo_sum2(app, args): """ Show the associated mongodb record """ gb1_field = "${}".format(args.group_by_1) gb2_field = "${}".format(args.group_by_2) # gb_pair_field = "${}_${}".format(gb1_field, gb2_field) MONGO_mad = get_mongo_transient_db(app) if args.sort_on_field: sort_field = '_id' sort_order = 1 else: sort_field = 'total' sort_order = -1 query = [{ "$match": { "orphan": False } }, { '$group': { "_id": { "group1": gb1_field, "group2": gb2_field }, "total": { "$sum": "$filesize" }, "count": { "$sum": 1 } } }, { "$sort": { "sort_field": sort_order } }] res = list(MONGO_mad.aggregate(query)) total_size = 0 total_count = 0 gl1 = gl2 = len("Total") for r in res: g1 = str(r['_id'].get('group1')) g2 = str(r['_id'].get('group2')) gl1 = max(gl1, len(g1)) gl2 = max(gl2, len(g2)) fms = "{:" + str(gl1) + "} {:" + str(gl2) + "} {:>10} {:>9}" for r in res: g1 = str(r['_id'].get('group1', '-')) g2 = str(r['_id'].get('group2', '-')) total = r['total'] count = r['count'] total_size += total total_count += count if args.human: total = humansize(total) print(fms.format(g1, g2, total, count)) else: print("{}\t{}\t{}\t{}".format(g1, g2, total, count)) if args.human: total_size = humansize(total_size) print(fms.format("Total", "", total, count)) else: print("Total\t\t{}\t{}".format(total_size, total_count))
def waste_text_report(app, args): db = get_mongo_transient_db(app) res = _run_waste_command(app, 'waste_pipeline', force=args.force)['result'] if args.subject: print("Subject: {}".format(args.subject)) # his week's winner top = res[0] sha1sum = top['_id'] owners = set() hostcount = collections.defaultdict(lambda: 0) hostsize = collections.defaultdict(lambda: 0) total = 0 for rec in db.find({'sha1sum': sha1sum}): total += 1 host = rec['host'] hostcount[host] += 1 hostsize[host] += float(rec['filesize']) / float(rec['nlink']) owners.add(rec['username']) print("This week's winner: {}".format(", ".join(owners))) print("One file, ", end="") print("{} location".format(total), end="") if total > 1: print("s", end="") print(", {} server,".format(len(hostcount)), end="") if len(hostcount) > 1: print("s", end="") print(" wasting {}.".format(humansize(top['waste']))) print("try:\n mad repl {}\n\n".format(sha1sum)) no_to_print = 20 print("Waste overview: (no / sha1sum / waste / filesize)") print("=================================================\n") for i, r in enumerate(res): if i >= no_to_print: break sha1sum = r['_id'] if not sha1sum.strip(): continue print("{:2d} {} {:>10} {:>10}".format(i, sha1sum, humansize(r['waste']), humansize(r['filesize']))) print("\n\nDetails: (nlink/symlink/size/owner)") print("===================================") for i, r in enumerate(res): if i >= no_to_print: break sha1sum = r['_id'] if not sha1sum.strip(): continue print("# {:2d} {} {:>10} {:>10}".format(i, sha1sum, humansize(r['waste']), humansize(r['filesize']))) records = collections.defaultdict(list) hostcount = collections.defaultdict(lambda: 0) hostsize = collections.defaultdict(lambda: 0) for rec in db.find({'sha1sum': sha1sum}): host = rec['host'] records[host].append(rec) hostcount[host] += 1 hostsize[host] += float(rec['filesize']) / float(rec['nlink']) for h in hostcount: print("# Host: {}, copies: {}, total use: {}".format( h, hostcount[h], humansize(hostsize[h]))) for rec in records[host]: smarker = '.' if rec.get('is_symlink'): smarker = 'S' print(" {} {}".format(rec.get('nlink', '?'), smarker), end=' ') print(humansize(rec['filesize']), end=' ') print(rec['username']) print(" " + rec['fullpath']) # for j, pp in enumerate(textwrap.wrap(rec['fullpath'], 70)): # print(" " * 8 + pp) print("")
def waste_text_report(app, args): db = get_mongo_transient_db(app) res = _run_waste_command(app, 'waste_pipeline', force=args.force)['result'] if args.subject: print("Subject: {}".format(args.subject)) # his week's winner top = res[0] sha1sum = top['_id'] owners = set() hostcount = collections.defaultdict(lambda: 0) hostsize = collections.defaultdict(lambda: 0) total = 0 for rec in db.find({'sha1sum': sha1sum}): total += 1 host = rec['host'] hostcount[host] += 1 hostsize[host] += float(rec['filesize']) / float(rec['nlink']) owners.add(rec['username']) print("This week's winner: {}".format(", ".join(owners))) print("One file, ", end="") print("{} location".format(total), end="") if total > 1: print("s", end="") print(", {} server,".format(len(hostcount)), end="") if len(hostcount) > 1: print("s", end="") print(" wasting {}.".format(humansize(top['waste']))) print("try:\n mad repl {}\n\n".format(sha1sum)) no_to_print = 20 print("Waste overview: (no / sha1sum / waste / filesize)") print("=================================================\n") for i, r in enumerate(res): if i >= no_to_print: break sha1sum = r['_id'] if not sha1sum.strip(): continue print("{:2d} {} {:>10} {:>10}" .format(i, sha1sum, humansize(r['waste']), humansize(r['filesize']))) print("\n\nDetails: (nlink/symlink/size/owner)") print("===================================") for i, r in enumerate(res): if i >= no_to_print: break sha1sum = r['_id'] if not sha1sum.strip(): continue print("# {:2d} {} {:>10} {:>10}" .format(i, sha1sum, humansize(r['waste']), humansize(r['filesize']))) records = collections.defaultdict(list) hostcount = collections.defaultdict(lambda: 0) hostsize = collections.defaultdict(lambda: 0) for rec in db.find({'sha1sum': sha1sum}): host = rec['host'] records[host].append(rec) hostcount[host] += 1 hostsize[host] += float(rec['filesize']) / float(rec['nlink']) for h in hostcount: print("# Host: {}, copies: {}, total use: {}".format( h, hostcount[h], humansize(hostsize[h]))) for rec in records[host]: smarker = '.' if rec.get('is_symlink'): smarker = 'S' print(" {} {}".format(rec.get('nlink', '?'), smarker), end=' ') print(humansize(rec['filesize']), end=' ') print(rec['username']) print(" " + rec['fullpath']) # for j, pp in enumerate(textwrap.wrap(rec['fullpath'], 70)): # print(" " * 8 + pp) print("")