Exemple #1
0
    def reduce_report(self, buf):
        # progress report
        # rate = (buf['cnt_files'] - self.last_cnt)/(MPI.Wtime() - self.last_reduce_time)
        # print("Processed objects: %s, estimated processing rate: %d/s" % (buf['cnt_files'], rate))
        # self.last_cnt = buf['cnt_files']

        rate = (buf['reduce_items'] - self.last_cnt) / \
            (MPI.Wtime() - self.last_reduce_time)
        if py_version() == "py26":
            fmt_msg = "Scanned files: {0:<12}   Processing rate: {1:<6}/s   HWM mem: {2:<12}   Work Queue: {3:<12}"
        else:
            fmt_msg = "Scanned files: {:<12,}   Processing rate: {:<6,}/s   HWM mem: {:<12}   Work Queue: {:<12,}"
        print(fmt_msg.format(
            buf['reduce_items'],
            int(rate),
            bytes_fmt(buf['mem_snapshot']),
            buf['work_qsize']))
        self.last_cnt = buf['reduce_items']
        self.last_reduce_time = MPI.Wtime()
Exemple #2
0
NUM_FILES = 10 ** 7

if len(sys.argv) != 2:
    print("memtest [v1|v2]")
    sys.exit(0)

if sys.argv[1] == "v0":
    from pcircle.fdef import FileItem0 as FileItem
elif sys.argv[1] == "v1":
    from pcircle.fdef import FileItem1 as FileItem
else:
    print("Wrong version")
    sys.exit(1)


def randstr(size=32):

    size = random.randint(16, 32)
    return "/the/common/path/we/have/" + "".join(random.choice(string.ascii_letters) for i in range(size))


mem_init = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
print("Creating {:,} FileItem instances".format(NUM_FILES))
alist = [FileItem(randstr()) for i in xrange(NUM_FILES)]
mem_final = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss

print("Initial RAM usage: {:14}".format(bytes_fmt(mem_init)))
print("  Final RAM usage: {:14}".format(bytes_fmt(mem_final)))
print("  Delta RAM usage: {:14}".format(bytes_fmt(mem_final - mem_init)))
Exemple #3
0
NUM_FILES = 10**7

if len(sys.argv) != 2:
    print("memtest [v1|v2]")
    sys.exit(0)

if sys.argv[1] == 'v0':
    from pcircle.fdef import FileItem0 as FileItem
elif sys.argv[1] == 'v1':
    from pcircle.fdef import FileItem1 as FileItem
else:
    print("Wrong version")
    sys.exit(1)


def randstr(size=32):

    size = random.randint(16, 32)
    return "/the/common/path/we/have/" + \
           ''.join(random.choice(string.ascii_letters) for i in range(size))


mem_init = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
print('Creating {:,} FileItem instances'.format(NUM_FILES))
alist = [FileItem(randstr()) for i in xrange(NUM_FILES)]
mem_final = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss

print('Initial RAM usage: {:14}'.format(bytes_fmt(mem_init)))
print('  Final RAM usage: {:14}'.format(bytes_fmt(mem_final)))
print('  Delta RAM usage: {:14}'.format(bytes_fmt(mem_final - mem_init)))
Exemple #4
0
def gen_histogram(total_file_size, dist_file=None):
    """Generate file set histogram"""

    syslog_filecount_hist = ""
    syslog_fsizeperc_hist = ""
    bins_fmt = utils.bins_strs(G.bins)
    gather_histogram()
    if comm.rank == 0:
        total_num_of_files = hist.sum()
        if total_num_of_files == 0:
            err_and_exit("No histogram generated.\n")

        print("Fileset Histogram\n")

        if py_version() == "py26":
            msg = "\t{0:<3}{1:<15}{2:<15}{3:>10}{4:>15}{5:>15}"
            msg2 = "\t{0:<3}{1:<15}{2:<15}{3:>10}{4:>15}{5:>15}"
        else:
            msg = "\t{:<3}{:<15}{:<15,}{:>10}{:>15}{:>15}"
            msg2 = "\t{:<3}{:<15}{:<15}{:>10}{:>15}{:>15}"

        print(msg2.format("", "Buckets", "Num of Files",
                          "Size",  "%(Files)", "%(Size)"))
        print("")
        for idx, rightbound in enumerate(G.bins):
            percent_files = 100 * \
                hist[idx] / \
                float(total_num_of_files) if total_num_of_files != 0 else 0
            percent_size = 100 * \
                fsize[idx] / \
                float(total_file_size) if total_file_size != 0 else 0

            print(msg.format("<= ", utils.bytes_fmt(rightbound),
                             hist[idx],
                             utils.bytes_fmt(fsize[idx]),
                             "%0.2f%%" % percent_files, "%0.2f%%" % percent_size))

            # NO BLOCK HISTOGRAM
            #
            # bucket_scale = 0.30
            # star_count = int(bucket_scale * percent)
            # print(msg.format("<= ", utils.bytes_fmt(rightbound),
            #                  hist[idx],
            #                  utils.bytes_fmt(fsize[idx]),
            #                  "%0.2f%%" % percent, '∎' * star_count))

            syslog_filecount_hist += "%s = %s, " % (bins_fmt[idx], hist[idx])
            syslog_fsizeperc_hist += "%s = %s, " % (
                bins_fmt[idx], percent_size)

        # special processing of last row
        percent_files = 100 * \
            hist[-1] / \
            float(total_num_of_files) if total_num_of_files != 0 else 0
        percent_size = 100 * \
            fsize[-1] / float(total_file_size) if total_file_size != 0 else 0
        print(msg.format("> ", utils.bytes_fmt(rightbound),
                         hist[-1],
                         utils.bytes_fmt(fsize[-1]),
                         "%0.2f%%" % percent_files,
                         "%0.2f%%" % percent_size))

        # star_count = int(bucket_scale * percent)
        # print(msg.format("> ", utils.bytes_fmt(rightbound), hist[-1],
        #                  utils.bytes_fmt(fsize[-1]),
        #                  "%0.2f%%" % percent, '∎' * star_count))
        syslog_filecount_hist += "%s = %s" % (bins_fmt[-1], hist[-1])
        syslog_fsizeperc_hist += "%s = %s" % (bins_fmt[-1], percent_size)

        if dist_file is not None:
            gen_dist_file(bins_fmt, hist, dist_file)
        # end of if comm.rank == 0

    return syslog_filecount_hist, syslog_fsizeperc_hist
Exemple #5
0
def main():
    global comm, args, stripe_out, DIR_BINS, DIR_HIST

    fpipe.listen()

    args = parse_and_bcast(comm, gen_parser)

    try:
        G.src = utils.check_src2(args.path)
    except ValueError as e:
        err_and_exit("Error: %s not accessible" % e)

    G.memitem_threshold = args.item
    G.loglevel = args.loglevel
    hosts_cnt = tally_hosts()

    # doing directory profiling?
    if args.dirprof:
        # check the input
        if args.dirbins is None:
            # err_and_exit("Error: missing directory bin parameters: a sorted integer list\n")
            args.dirbins = [0, 10, 100, 1000, 10 **
                            4, 10**5, 10**6, 10**7, 10**8]
        else:
            myList = sorted(set(args.dirbins))
            if myList != args.dirbins:
                err_and_exit(
                    "Error: duplicated, or unsorted bins: %s\n" % args.dirbins)

        DIR_BINS = args.dirbins
        DIR_HIST = [0] * (len(DIR_BINS) + 1)

    # Doing stripe analysis? lfs is not really bullet-proof way
    # we might need a better way of doing fstype check.

    if args.lustre_stripe:
        G.lfs_bin = lfs.check_lfs()
        G.stripe_threshold = utils.conv_unit(args.stripe_threshold)
        try:
            stripe_out = os.open(args.stripe_output,
                                 os.O_CREAT | os.O_WRONLY | os.O_APPEND)
        except:
            err_and_exit("Error: can't create stripe output: %s" %
                         args.stripe_output)

    if args.exclude:
        process_exclude_file()

    if comm.rank == 0:
        print("Running Parameters:\n")
        print("\t{0:<20}{1:<20}".format("fprof version:", __version__))
        print("\t{0:<20}{1:<20}".format("Full rev id:", __revid__))
        print("\t{0:<20}{1:<20}".format("Num of hosts:", hosts_cnt))
        print("\t{0:<20}{1:<20}".format(
            "Num of processes:", MPI.COMM_WORLD.Get_size()))

        if args.syslog:
            print("\t{0:<20}{1:<20}".format("Syslog report: ", "yes"))
        else:
            print("\t{0:<20}{1:<20}".format("Syslog report: ", "no"))

        if args.dirprof:
            print("\t{0:<20}{1:<20}".format("Dir bins: ", args.dirbins))

        if args.lustre_stripe:
            print("\t{0:<20}{1:<20}".format("Stripe analysis: ", "yes"))
            print("\t{0:<20}{1:<20}".format(
                "Stripe threshold: ", args.stripe_threshold))
        else:
            print("\t{0:<20}{1:<20}".format("Stripe analysis: ", "no"))

        print("\t{0:<20}{1:<20}".format("Root path:", str(G.src)))

        if args.exclude:
            print("\nExclusions:\n")
            for ele in EXCLUDE:
                print("\t %s" % ele)

    circle = Circle()
    if args.perprocess:
        circle.report_enabled = True
    else:
        circle.report_enabled = False

    if args.progress:
        circle.report_enabled = False
        circle.reduce_enabled = True

    treewalk = ProfileWalk(circle, G.src, perfile=args.perfile)
    circle.begin(treewalk)

    # we need the total file size to calculate GPFS efficiency
    total_file_size = treewalk.epilogue()

    msg1, msg2 = gen_histogram(total_file_size, args.dist_file)

    if args.dirprof:
        gen_directory_histogram()

    if comm.rank == 0 and args.syslog:
        sendto_syslog("fprof.filecount.hist", msg1)
        sendto_syslog("fprof.fsize_perc.hist", msg2)

    if args.topn_files:
        topfiles = gather_topfiles()
        if comm.rank == 0:
            print("\nTop N File Report:\n")
            # edge case: not enough files (< args.top)
            totaln = args.topn_files if len(
                topfiles) > args.topn_files else len(topfiles)
            for index, _ in enumerate(range(totaln)):
                size, path = topfiles[index]
                print("\t%s: %s (%s)" % (index + 1,
                                         path,
                                         utils.bytes_fmt(size)))
            print("")

    if args.topn_dirs:
        topdirs = gather_topdirs()
        if comm.rank == 0:
            print("\nTop N Directory Report:\n")
            totaln = args.topn_dirs if len(
                topdirs) > args.topn_dirs else len(topdirs)
            for index, _ in enumerate(xrange(totaln)):
                size, path = topdirs[index]
                print("\t{0:}: {1:}  ({2:,} items)".format(
                    index+1, path, size))

            print("")

    if args.gpfs_block_alloc:
        gpfs_blocks = gather_gpfs_blocks()
        gather_gpfs_dii()
        if comm.rank == 0:
            print("\nGPFS Block Alloc Report:\n")
            print("\t{0:<15}{1:<4}".format("inode size:", args.inodesz))
            print("\t{0:<25}{1:>15,}".format(
                "DII (data-in-inode) count:", DII_COUNT))
            print("\tSubblocks: %s\n" % gpfs_blocks)
            fmt_msg = "\tBlocksize: {0:<6}   Estimated Space: {1:<20s}   Efficiency: {2:>6.2%}"
            for idx, bsz in enumerate(G.gpfs_block_size):
                gpfs_file_size = gpfs_blocks[idx] * G.gpfs_subs[idx]

                if gpfs_file_size != 0:
                    print(fmt_msg.format(bsz, bytes_fmt(gpfs_file_size),
                                         total_file_size/float(gpfs_file_size)))
                else:
                    print(fmt_msg.format(bsz, bytes_fmt(gpfs_file_size), 0))

    treewalk.cleanup()
    circle.finalize()

    if args.lustre_stripe and stripe_out:
        os.close(stripe_out)

        sp_workload = comm.gather(Tally.spcnt)
        if comm.rank == 0:
            print("Stripe workload total: %s, distribution: %s" %
                  (sum(sp_workload), sp_workload))
Exemple #6
0
    def epilogue(self):
        self.total_tally()
        self.time_ended = MPI.Wtime()

        if self.circle.rank == 0:
            print("\nFprof epilogue:\n")
            if py_version() != "py26":
                fmt_msg1 = "\t{0:<25}{1:<20,}"    # numeric
            else:  # 2.6 compat
                fmt_msg1 = "\t{0:<25}{1:<20}"    # numeric

            fmt_msg2 = "\t{0:<25}{1:<20}"     # string
            fmt_msg3 = "\t{0:<25}{1:<20.2f}"  # float
            print(fmt_msg1.format("Directory count:", Tally.total_dirs))
            print(fmt_msg1.format("Sym links count:", Tally.total_symlinks))
            print(fmt_msg1.format("pipes count:", Tally.total_pipes))
            print(fmt_msg1.format("sockets count:", Tally.total_sockets))
            print(fmt_msg1.format("Hard linked files:", Tally.total_nlinked_files))
            print(fmt_msg1.format("File count:", Tally.total_files))
            print(fmt_msg1.format("Zero byte files:", Tally.total_0byte_files))
            print(fmt_msg1.format("Sparse files:", Tally.total_sparse))

            if args.profdev:
                print(fmt_msg1.format("Dev file count:", Tally.devfile_cnt))
                print(fmt_msg2.format("Dev file size:",
                                      bytes_fmt(Tally.devfile_sz)))
            print(fmt_msg1.format("Skipped count:", Tally.total_skipped))
            print(fmt_msg2.format("Total file size:",
                                  bytes_fmt(Tally.total_filesize)))

            if args.cpr:
                compressed = float(Tally.total_blocks * 512)
                uncompressed = float(Tally.total_stat_filesize)
                ratio = uncompressed/compressed
                saving = 1 - compressed/uncompressed
                print(fmt_msg3.format("Compression Ratio:", ratio))
                print(fmt_msg3.format("Compression Saving:", saving))

            if Tally.total_files != 0:
                print(fmt_msg2.format("Avg file size:",
                                      bytes_fmt(Tally.total_filesize/float(Tally.total_files))))
            print(fmt_msg1.format("Max files within dir:", Tally.max_files))
            elapsed_time = self.time_ended - self.time_started
            processing_rate = int((Tally.total_files + Tally.total_dirs +
                                   Tally.total_symlinks + Tally.total_skipped) / elapsed_time)
            print(fmt_msg2.format("Tree walk time:",
                                  utils.conv_time(elapsed_time)))
            print(fmt_msg2.format("Scanning rate:", str(processing_rate) + "/s"))
            print(fmt_msg2.format("Fprof loads:", str(Tally.taskloads)))
            print("")

            if args.syslog:
                sendto_syslog("fprof.rootpath", "%s" % ",".join(G.src))
                sendto_syslog("fprof.version", "%s" % __version__)
                sendto_syslog("fprof.revid", "%s" % __revid__)
                sendto_syslog("fprof.dir_count", Tally.total_dirs)
                sendto_syslog("fprof.sym_count", Tally.total_symlinks)
                sendto_syslog("fprof.file_count", Tally.total_files)
                sendto_syslog("fprof.total_file_size",
                              bytes_fmt(Tally.total_filesize))
                if Tally.total_files > 0:
                    sendto_syslog("fprof.avg_file_size", bytes_fmt(
                        Tally.total_filesize/float(Tally.total_files)))
                sendto_syslog("fprof.walktime", utils.conv_time(elapsed_time))
                sendto_syslog("fprof.scan_rate", processing_rate)

        return Tally.total_filesize