Example #1
0
File: mongo.py Project: dimlev/mad2
def mongo_sum2(app, args):
    """
    Show the associated mongodb record
    """
    gb1_field = "${}".format(args.group_by_1)
    gb2_field = "${}".format(args.group_by_2)

    gb_pair_field = "${}_${}".format(gb1_field, gb2_field)

    MONGO_mad = get_mongo_db(app)

    if args.sort_on_field:
        sort_field = '_id'
        sort_order = 1
    else:
        sort_field = 'total'
        sort_order = -1
    res = MONGO_mad.aggregate([
            {'$group': {
                "_id": {
                    "group1": gb1_field,
                    "group2": gb2_field },
                "total": {"$sum": "$filesize"},
                "count": {"$sum": 1}}},
             {"$sort" : {
              "sort_field": sort_order
              }}
        ])
    total_size = 0
    total_count = 0

    gl1 = gl2 = len("Total")

    for r in res['result']:
        #print(r)
        g1 = str(r['_id'].get('group1'))
        g2 = str(r['_id'].get('group2'))
        gl1 = max(gl1, len(g1))
        gl2 = max(gl2, len(g2))

    fms = "{:" + str(gl1) + "}  {:" + str(gl2) + "}  {:>10}  {:>9}"
    for r in res['result']:
        g1 = str(r['_id'].get('group1', '-'))
        g2 = str(r['_id'].get('group2', '-'))
        total = r['total']
        count = r['count']
        total_size += total
        total_count += count
        if args.human:
            total = humansize(total)
            print(fms.format(g1,g2, total, count))
        else:
            print("{}\t{}\t{}\t{}".format(g1, g2, total, count))

    if args.human:
        total_size = humansize(total_size)
        print(fms.format(
            "Total", "", total, count))
    else:
        print("Total\t\t{}\t{}".format(total_size, total_count))
Example #2
0
File: mongo.py Project: mfiers/mad2
def waste(app, args):

    db = get_mongo_transient_db(app)

    res = _run_waste_command(app, 'waste_pipeline',
                             force=args.force)

    if args.todb:
        dbrec = {'time': datetime.datetime.utcnow(),
                 'data': res}
        db = mad2.util.get_mongo_db(app)
        db.waste.insert_one(dbrec)
        return

    def cprint_nocolor(*args, **kwargs):
        if 'color' in kwargs:
            del kwargs['color']
        if len(args) > 1:
            args = args[:1]
        print(*args, **kwargs)

    # if args.no_color:
    #     cprint = cprint_nocolor

    for i, r in enumerate(res):
        if i >= args.no_records:
            break

        sha1sum = r['_id']
        if not sha1sum.strip():
            continue

        cprint(sha1sum, 'yellow', end='')
        cprint(" sz ", "grey", end="")
        cprint("{:>9}".format(humansize(r['waste'])), end='')
        cprint(" w ", "grey", end="")
        cprint("{:>9}".format(humansize(r['filesize'])), end='')
        hostcount = collections.defaultdict(lambda: 0)
        hostsize = collections.defaultdict(lambda: 0)
        owners = set()
        for f in db.find({'sha1sum': sha1sum}):
            owners.add(f['username'])
            host = f['host']
            hostcount[host] += 1
            hostsize[host] += float(f['filesize']) / float(f['nlink'])

        for h in hostcount:
            print(' ', end='')
            cprint(h, 'green', end=':')
            cprint(hostcount[h], 'cyan', end="")

        cprint(" ", end="")
        cprint(", ".join(owners), 'red')
Example #3
0
def waste(app, args):

    db = get_mongo_transient_db(app)

    res = _run_waste_command(app, 'waste_pipeline', force=args.force)

    if args.todb:
        dbrec = {'time': datetime.datetime.utcnow(), 'data': res}
        db = mad2.util.get_mongo_db(app)
        db.waste.insert_one(dbrec)
        return

    def cprint_nocolor(*args, **kwargs):
        if 'color' in kwargs:
            del kwargs['color']
        if len(args) > 1:
            args = args[:1]
        print(*args, **kwargs)

    # if args.no_color:
    #     cprint = cprint_nocolor

    for i, r in enumerate(res):
        if i >= args.no_records:
            break

        sha1sum = r['_id']
        if not sha1sum.strip():
            continue

        cprint(sha1sum, 'yellow', end='')
        cprint(" sz ", "grey", end="")
        cprint("{:>9}".format(humansize(r['waste'])), end='')
        cprint(" w ", "grey", end="")
        cprint("{:>9}".format(humansize(r['filesize'])), end='')
        hostcount = collections.defaultdict(lambda: 0)
        hostsize = collections.defaultdict(lambda: 0)
        owners = set()
        for f in db.find({'sha1sum': sha1sum}):
            owners.add(f['username'])
            host = f['host']
            hostcount[host] += 1
            hostsize[host] += float(f['filesize']) / float(f['nlink'])

        for h in hostcount:
            print(' ', end='')
            cprint(h, 'green', end=':')
            cprint(hostcount[h], 'cyan', end="")

        cprint(" ", end="")
        cprint(", ".join(owners), 'red')
Example #4
0
def mongo_sum(app, args):
    """
    Show the associated mongodb record
    """

    res = _single_sum(app, group_by=args.group_by, force=args.force)
    total_size = int(0)
    total_count = 0

    mgn = len("Total")
    for reshost in res:
        gid = reshost['_id']
        if gid is None:
            mgn = max(4, mgn)
        else:
            mgn = max(len(str(reshost['_id'])), mgn)

    fms = "{:" + str(mgn) + "}\t{:>10}\t{:>9}"
    for reshost in res:
        total = reshost['total']
        count = reshost['count']
        total_size += int(total)
        total_count += count
        if args.human:
            total_human = humansize(total)
            categ = reshost['_id']
            if categ is None:
                categ = "<undefined>"
            print(fms.format(
                categ, total_human, count))
        else:
            print("{}\t{}\t{}".format(
                reshost['_id'], total, count))

    if args.human:
        total_size_human = humansize(total_size)
        print(fms.format(
            "Total", total_size_human, count))
    else:
        print("Total\t{}\t{}".format(total_size, total_count))
Example #5
0
def mongo_sum(app, args):
    """
    Show the associated mongodb record
    """

    res = _single_sum(app, group_by=args.group_by, force=args.force)
    total_size = int(0)
    total_count = 0

    mgn = len("Total")
    for reshost in res:
        gid = reshost['_id']
        if gid is None:
            mgn = max(4, mgn)
        else:
            mgn = max(len(str(reshost['_id'])), mgn)

    fms = "{:" + str(mgn) + "}\t{:>10}\t{:>9}"
    for reshost in res:
        total = reshost['total']
        count = reshost['count']
        total_size += int(total)
        total_count += count
        if args.human:
            total_human = humansize(total)
            categ = reshost['_id']
            if categ is None:
                categ = "<undefined>"
            print(fms.format(categ, total_human, count))
        else:
            print("{}\t{}\t{}".format(reshost['_id'], total, count))

    if args.human:
        total_size_human = humansize(total_size)
        print(fms.format("Total", total_size_human, count))
    else:
        print("Total\t{}\t{}".format(total_size, total_count))
Example #6
0
def waste(app, args):

    db = get_mongo_transient_db(app)

    res = _run_waste_command(app, 'waste_pipeline',
                             force=args.force)['result']

    for i, r in enumerate(res):
        if i >= args.no_records:
            break

        sha1sum = r['_id']
        if not sha1sum.strip():
            continue

        cprint(sha1sum, 'grey', end='')
        cprint(" sz ", "grey", end="")
        cprint("{:>9}".format(humansize(r['waste'])), end='')
        cprint(" w ", "grey", end="")
        cprint("{:>9}".format(humansize(r['filesize'])), end='')

        hostcount = collections.defaultdict(lambda: 0)
        hostsize = collections.defaultdict(lambda: 0)
        owners = set()
        for f in db.find({'sha1sum': sha1sum}):
            owners.add(f['username'])
            host = f['host']
            hostcount[host] += 1
            hostsize[host] += float(f['filesize']) / float(f['nlink'])

        for h in hostcount:
            print(' ', end='')
            cprint(h, 'green', end=':')
            cprint(hostcount[h], 'cyan', end="")

        cprint(" ", end="")
        cprint(", ".join(owners), 'red')
Example #7
0
    def _process_query(query, madfile_in):
        res = MONGO_mad.find(query)
        res = list(res)
        for r in res:
            if args.volume and \
               r['volume'] != args.volume:
                continue

            if args.path_fragment and \
               args.path_fragment not in r['fullpath']:
                continue

            if args.echo:
                if len(res) > 1:
                    print(madfile_in['inputfile'])
                break

            days = (arrow.now() - arrow.get(r['save_time'])).days
            symlink = r.get('is_symlink', False)
            if symlink:
                stag = 'S'
            else:
                stag = '.'
            if args.raw_output:
                print("\t".join(
                    map(str, [
                        r['nlink'], stag,
                        (arrow.now() - arrow.get(r['save_time'])),
                        r['filesize'], r['host'], r['fullpath']
                    ])))
            else:
                cprint('%1d%s' % (r['nlink'], stag), 'yellow', end=" ")
                cprint('%3d' % days, 'green', end="d ")
                cprint('%6s' % humansize(r['filesize']), 'white', end=" ")
                if r['host'] in backup_hosts:
                    cprint(r['host'], 'green', attrs=['bold'], end=':')
                else:
                    cprint(r['host'], 'cyan', end=':')
                cprint(r['fullpath'])
Example #8
0
File: mongo.py Project: mfiers/mad2
    def _process_query(query, madfile_in):
        res = MONGO_mad.find(query)
        res = list(res)
        for r in res:
            if args.volume and \
               r['volume'] != args.volume:
                continue

            if args.path_fragment and \
               args.path_fragment not in r['fullpath']:
                continue

            if args.echo:
                if len(res) > 1:
                    print(madfile_in['inputfile'])
                break

            days = (arrow.now() - arrow.get(r['save_time'])).days
            symlink = r.get('is_symlink', False)
            if symlink:
                stag = 'S'
            else:
                stag = '.'
            if args.raw_output:
                print("\t".join(map(str, [
                    r['nlink'], stag, (arrow.now() -
                                       arrow.get(r['save_time'])),
                    r['filesize'], r['host'], r['fullpath']
                ])))
            else:
                cprint('%1d%s' % (r['nlink'], stag), 'yellow', end=" ")
                cprint('%3d' % days, 'green', end="d ")
                cprint('%6s' % humansize(r['filesize']), 'white', end=" ")
                if r['host'] in backup_hosts:
                    cprint(r['host'], 'green', attrs=['bold'], end=':')
                else:
                    cprint(r['host'], 'cyan', end=':')
                cprint(r['fullpath'])
Example #9
0
def mongo_sum2(app, args):
    """
    Show the associated mongodb record
    """
    gb1_field = "${}".format(args.group_by_1)
    gb2_field = "${}".format(args.group_by_2)

    # gb_pair_field = "${}_${}".format(gb1_field, gb2_field)

    MONGO_mad = get_mongo_transient_db(app)

    if args.sort_on_field:
        sort_field = '_id'
        sort_order = 1
    else:
        sort_field = 'total'
        sort_order = -1

    query = [{
        "$match": {
            "orphan": False
        }
    }, {
        '$group': {
            "_id": {
                "group1": gb1_field,
                "group2": gb2_field
            },
            "total": {
                "$sum": "$filesize"
            },
            "count": {
                "$sum": 1
            }
        }
    }, {
        "$sort": {
            "sort_field": sort_order
        }
    }]

    res = list(MONGO_mad.aggregate(query))
    total_size = 0
    total_count = 0

    gl1 = gl2 = len("Total")

    for r in res:
        g1 = str(r['_id'].get('group1'))
        g2 = str(r['_id'].get('group2'))
        gl1 = max(gl1, len(g1))
        gl2 = max(gl2, len(g2))

    fms = "{:" + str(gl1) + "}  {:" + str(gl2) + "}  {:>10}  {:>9}"
    for r in res:
        g1 = str(r['_id'].get('group1', '-'))
        g2 = str(r['_id'].get('group2', '-'))
        total = r['total']
        count = r['count']
        total_size += total
        total_count += count
        if args.human:
            total = humansize(total)
            print(fms.format(g1, g2, total, count))
        else:
            print("{}\t{}\t{}\t{}".format(g1, g2, total, count))

    if args.human:
        total_size = humansize(total_size)
        print(fms.format("Total", "", total, count))
    else:
        print("Total\t\t{}\t{}".format(total_size, total_count))
Example #10
0
def waste_text_report(app, args):

    db = get_mongo_transient_db(app)

    res = _run_waste_command(app, 'waste_pipeline', force=args.force)['result']

    if args.subject:
        print("Subject: {}".format(args.subject))

    # his week's winner
    top = res[0]
    sha1sum = top['_id']
    owners = set()
    hostcount = collections.defaultdict(lambda: 0)
    hostsize = collections.defaultdict(lambda: 0)

    total = 0
    for rec in db.find({'sha1sum': sha1sum}):
        total += 1
        host = rec['host']
        hostcount[host] += 1
        hostsize[host] += float(rec['filesize']) / float(rec['nlink'])
        owners.add(rec['username'])

    print("This week's winner: {}".format(", ".join(owners)))
    print("One file, ", end="")
    print("{} location".format(total), end="")
    if total > 1:
        print("s", end="")
    print(", {} server,".format(len(hostcount)), end="")
    if len(hostcount) > 1:
        print("s", end="")
    print(" wasting {}.".format(humansize(top['waste'])))
    print("try:\n   mad repl {}\n\n".format(sha1sum))

    no_to_print = 20
    print("Waste overview: (no / sha1sum / waste / filesize)")
    print("=================================================\n")
    for i, r in enumerate(res):
        if i >= no_to_print:
            break

        sha1sum = r['_id']
        if not sha1sum.strip():
            continue
        print("{:2d} {} {:>10} {:>10}".format(i, sha1sum,
                                              humansize(r['waste']),
                                              humansize(r['filesize'])))

    print("\n\nDetails: (nlink/symlink/size/owner)")
    print("===================================")
    for i, r in enumerate(res):
        if i >= no_to_print:
            break

        sha1sum = r['_id']
        if not sha1sum.strip():
            continue

        print("# {:2d} {} {:>10} {:>10}".format(i, sha1sum,
                                                humansize(r['waste']),
                                                humansize(r['filesize'])))

        records = collections.defaultdict(list)
        hostcount = collections.defaultdict(lambda: 0)
        hostsize = collections.defaultdict(lambda: 0)

        for rec in db.find({'sha1sum': sha1sum}):
            host = rec['host']
            records[host].append(rec)
            hostcount[host] += 1
            hostsize[host] += float(rec['filesize']) / float(rec['nlink'])

        for h in hostcount:
            print("# Host: {}, copies: {}, total use: {}".format(
                h, hostcount[h], humansize(hostsize[h])))
            for rec in records[host]:
                smarker = '.'
                if rec.get('is_symlink'):
                    smarker = 'S'
                print("  {} {}".format(rec.get('nlink', '?'), smarker),
                      end=' ')
                print(humansize(rec['filesize']), end=' ')
                print(rec['username'])
                print("   " + rec['fullpath'])
#                for j, pp in enumerate(textwrap.wrap(rec['fullpath'], 70)):
#                    print(" " * 8 + pp)

        print("")
Example #11
0
def waste_text_report(app, args):

    db = get_mongo_transient_db(app)

    res = _run_waste_command(app, 'waste_pipeline',
                             force=args.force)['result']

    if args.subject:
        print("Subject: {}".format(args.subject))

    # his week's winner
    top = res[0]
    sha1sum = top['_id']
    owners = set()
    hostcount = collections.defaultdict(lambda: 0)
    hostsize = collections.defaultdict(lambda: 0)

    total = 0
    for rec in db.find({'sha1sum': sha1sum}):
        total += 1
        host = rec['host']
        hostcount[host] += 1
        hostsize[host] += float(rec['filesize']) / float(rec['nlink'])
        owners.add(rec['username'])

    print("This week's winner: {}".format(", ".join(owners)))
    print("One file, ", end="")
    print("{} location".format(total), end="")
    if total > 1:
        print("s", end="")
    print(", {} server,".format(len(hostcount)), end="")
    if len(hostcount) > 1:
        print("s", end="")
    print(" wasting {}.".format(humansize(top['waste'])))
    print("try:\n   mad repl {}\n\n".format(sha1sum))

    no_to_print = 20
    print("Waste overview: (no / sha1sum / waste / filesize)")
    print("=================================================\n")
    for i, r in enumerate(res):
        if i >= no_to_print:
            break

        sha1sum = r['_id']
        if not sha1sum.strip():
            continue
        print("{:2d} {} {:>10} {:>10}"
              .format(i, sha1sum, humansize(r['waste']),
                      humansize(r['filesize'])))

    print("\n\nDetails: (nlink/symlink/size/owner)")
    print("===================================")
    for i, r in enumerate(res):
        if i >= no_to_print:
            break

        sha1sum = r['_id']
        if not sha1sum.strip():
            continue

        print("# {:2d} {} {:>10} {:>10}"
              .format(i, sha1sum, humansize(r['waste']),
                      humansize(r['filesize'])))

        records = collections.defaultdict(list)
        hostcount = collections.defaultdict(lambda: 0)
        hostsize = collections.defaultdict(lambda: 0)

        for rec in db.find({'sha1sum': sha1sum}):
            host = rec['host']
            records[host].append(rec)
            hostcount[host] += 1
            hostsize[host] += float(rec['filesize']) / float(rec['nlink'])

        for h in hostcount:
            print("# Host: {}, copies: {}, total use: {}".format(
                h, hostcount[h], humansize(hostsize[h])))
            for rec in records[host]:
                smarker = '.'
                if rec.get('is_symlink'):
                    smarker = 'S'
                print("  {} {}".format(rec.get('nlink', '?'), smarker),
                      end=' ')
                print(humansize(rec['filesize']), end=' ')
                print(rec['username'])
                print("   " + rec['fullpath'])
#                for j, pp in enumerate(textwrap.wrap(rec['fullpath'], 70)):
#                    print(" " * 8 + pp)

        print("")