Example #1
0
    def epilogue(self):
        self.total_tally()
        self.time_ended = MPI.Wtime()

        if self.circle.rank == 0:
            print("\nFWALK Epilogue:\n")
            print("\t{:<20}{:<20}".format("Directory count:", T.total_dirs))
            print("\t{:<20}{:<20}".format("Sym Links count:",
                                          T.total_symlinks))
            print("\t{:<20}{:<20}".format("File count:", T.total_files))
            print("\t{:<20}{:<20}".format("Skipped count:", T.total_skipped))
            print("\t{:<20}{:<20}".format("Total file size:",
                                          bytes_fmt(T.total_filesize)))
            if T.total_files != 0:
                print("\t{:<20}{:<20}".format(
                    "Avg file size:",
                    bytes_fmt(T.total_filesize / float(T.total_files))))
            print("\t{:<20}{:<20}".format(
                "Tree talk time:",
                utils.conv_time(self.time_ended - self.time_started)))
            print("\t{:<20}{:<20}".format("Use store flist:",
                                          "%s" % self.use_store))
            print("\t{:<20}{:<20}".format("Use store workq:",
                                          "%s" % self.circle.use_store))
            print("\tFWALK Loads: %s" % taskloads)
            print("")
Example #2
0
def prep_recovery():
    """ Prepare for checkpoint recovery, return recovered workq """
    global args, circle

    oldsz, tsz, sz = 0, 0, 0
    sz_db = 0
    cobj = None
    local_checkpoint_cnt = 0
    chk_file = ".pcp_workq.%s.%s" % (args.rid, circle.rank)
    chk_file_db = ".pcp_workq.%s.%s.db" % (args.rid, circle.rank)
    G.chk_file = chk_file
    G.chk_file_db = chk_file_db

    if os.path.exists(chk_file):
        local_checkpoint_cnt = 1
        with open(chk_file, "rb") as f:
            try:
                cobj = pickle.load(f)
                sz = get_workq_size(cobj.workq)
                src = cobj.src
                dest = cobj.dest
                oldsz = cobj.totalsize
            except Exception as e:
                log.error("error reading %s" % chk_file, extra=dmsg)
                circle.comm.Abort()

    if os.path.exists(chk_file_db):
        qsize_db = 0
        local_checkpoint_cnt = 1
        conn = sqlite3.connect(chk_file_db)
        cur = conn.cursor()
        try:
            cur.execute("SELECT * FROM checkpoint")
            qsize_db, sz_db = cur.fetchone()
        except sqlite3.OperationalError as e:
            pass

    log.debug("located chkpoint %s, sz=%s, local_cnt=%s" %
                 (chk_file, sz, local_checkpoint_cnt), extra=dmsg)

    total_checkpoint_cnt = circle.comm.allreduce(local_checkpoint_cnt)
    log.debug("total_checkpoint_cnt = %s" % total_checkpoint_cnt, extra=dmsg)
    verify_checkpoint(chk_file, total_checkpoint_cnt)

    # acquire total size
    total_sz_mem = circle.comm.allreduce(sz)
    total_sz_db = circle.comm.allreduce(sz_db)
    T.total_filesize = total_sz_mem + total_sz_db
    if T.total_filesize == 0:
        if circle.rank == 0:
            print("\nRecovery size is 0 bytes, can't proceed.")
        circle.exit(0)

    if circle.rank == 0:
        print("\nResume copy\n")
        print("\t{:<20}{:<20}".format("Original size:", bytes_fmt(oldsz)))
        print("\t{:<20}{:<20}".format("Recovery size:", bytes_fmt(T.total_filesize)))
        print("")

    return cobj.workq
Example #3
0
    def epilogue(self):
        total_dirs, total_files, total_filesize, total_symlinks, total_skipped, maxfiles = self.total_tally()
        self.time_ended = MPI.Wtime()

        if self.circle.rank == 0:
            print("\nFprof epilogue:\n")
            fmt_msg1 = "\t{:<25}{:<20,}"    # numeric
            fmt_msg2 = "\t{:<25}{:<20}"     # string

            print(fmt_msg1.format("Directory count:", total_dirs))
            print(fmt_msg1.format("Sym Links count:", total_symlinks))
            print(fmt_msg1.format("File count:", total_files))
            print(fmt_msg1.format("Skipped count:", total_skipped))
            print(fmt_msg2.format("Total file size:", bytes_fmt(total_filesize)))
            if total_files != 0:
                print(fmt_msg2.format("Avg file size:", bytes_fmt(total_filesize/float(total_files))))
            print(fmt_msg1.format("Max files within dir:", maxfiles))
            elapsed_time = self.time_ended - self.time_started
            processing_rate = int((total_files + total_dirs + total_symlinks + total_skipped) / elapsed_time)
            print(fmt_msg2.format("Tree walk time:", utils.conv_time(elapsed_time)))
            print(fmt_msg2.format("Scanning rate:", str(processing_rate) + "/s"))
            print(fmt_msg2.format("Fprof loads:", taskloads))
            print("")

        return total_filesize
Example #4
0
def main():
    global comm, args
    args = parse_and_bcast(comm, gen_parser)

    try:
        G.src = utils.check_src(args.path)
    except ValueError as e:
        err_and_exit("Error: %s not accessible" % e)

    G.use_store = args.use_store
    G.loglevel = args.loglevel

    hosts_cnt = tally_hosts()

    if comm.rank == 0:
        print("Running Parameters:\n")
        print("\t{:<20}{:<20}".format("FWALK version:", __version__))
        print("\t{:<20}{:<20}".format("Num of hosts:", hosts_cnt))
        print("\t{:<20}{:<20}".format("Num of processes:", MPI.COMM_WORLD.Get_size()))
        print("\t{:<20}{:<20}".format("Root path:", utils.choplist(G.src)))

    circle = Circle()
    treewalk = FWalk(circle, G.src)
    circle.begin(treewalk)

    if G.use_store:
        treewalk.flushdb()

    if args.stats:
        hist = global_histogram(treewalk)
        total = hist.sum()
        bucket_scale = 0.5
        if comm.rank == 0:
            print("\nFileset histograms:\n")
            for idx, rightbound in enumerate(bins[1:]):
                percent = 100 * hist[idx] / float(total)
                star_count = int(bucket_scale * percent)
                print("\t{:<3}{:<15}{:<8}{:<8}{:<50}".format("< ",
                    utils.bytes_fmt(rightbound), hist[idx],
                    "%0.2f%%" % percent, '∎' * star_count))

    if args.stats:
        treewalk.flist.sort(lambda f1, f2: cmp(f1.st_size, f2.st_size), reverse=True)
        globaltops = comm.gather(treewalk.flist[:args.top])
        if comm.rank == 0:
            globaltops = [item for sublist in globaltops for item in sublist]
            globaltops.sort(lambda f1, f2: cmp(f1.st_size, f2.st_size), reverse=True)
            if len(globaltops) < args.top:
                args.top = len(globaltops)
            print("\nStats, top %s files\n" % args.top)
            for i in xrange(args.top):
                print("\t{:15}{:<30}".format(utils.bytes_fmt(globaltops[i].st_size),
                      globaltops[i].path))

    treewalk.epilogue()
    treewalk.cleanup()
    circle.finalize()
Example #5
0
def gen_histogram(total_file_size):
    syslog_filecount_hist = ""
    syslog_fsizeperc_hist = ""
    bins_fmt = utils.bins_strs(G.bins)
    gather_histogram()
    if comm.rank == 0:
        total_num_of_files = hist.sum()
        if total_num_of_files == 0:
            err_and_exit("No histogram generated.\n")

        print("Fileset Histogram\n")

        msg = "\t{:<3}{:<15}{:<15,}{:>10}{:>15}{:>15}"
        msg2 = "\t{:<3}{:<15}{:<15}{:>10}{:>15}{:>15}"

        print(msg2.format("", "Buckets", "Num of Files", "Size",  "%(Files)", "%(Size)"))
        print("")
        for idx, rightbound in enumerate(G.bins):
            percent_files = 100 * hist[idx] / float(total_num_of_files)
            percent_size = 100 * fsize[idx] / float(total_file_size)

            print(msg.format("<= ", utils.bytes_fmt(rightbound),
                             hist[idx],
                             utils.bytes_fmt(fsize[idx]),
                             "%0.2f%%" % percent_files, "%0.2f%%" % percent_size))

            # NO BLOCK HISTOGRAM
            #
            # bucket_scale = 0.30
            # star_count = int(bucket_scale * percent)
            # print(msg.format("<= ", utils.bytes_fmt(rightbound),
            #                  hist[idx],
            #                  utils.bytes_fmt(fsize[idx]),
            #                  "%0.2f%%" % percent, '∎' * star_count))

            syslog_filecount_hist += "%s = %s, " % (bins_fmt[idx], hist[idx])
            syslog_fsizeperc_hist += "%s = %s, " % (bins_fmt[idx], percent_size)

        # special processing of last row
        percent_files = 100 * hist[-1] / float(total_num_of_files)
        percent_size = 100 * fsize[-1] / float(total_file_size)
        print(msg.format("> ", utils.bytes_fmt(rightbound),
                         hist[-1],
                         utils.bytes_fmt(fsize[-1]),
                         "%0.2f%%" % percent_files,
                         "%0.2f%%" % percent_size))

        # star_count = int(bucket_scale * percent)
        # print(msg.format("> ", utils.bytes_fmt(rightbound), hist[-1],
        #                  utils.bytes_fmt(fsize[-1]),
        #                  "%0.2f%%" % percent, '∎' * star_count))
        syslog_filecount_hist += "%s = %s" % (bins_fmt[-1], hist[-1])
        syslog_fsizeperc_hist += "%s = %s" % (bins_fmt[-1], percent_size)

    return syslog_filecount_hist, syslog_fsizeperc_hist
Example #6
0
    def reduce_report(self, buf):
        out = ""
        if self.totalsize != 0:
            out += "%.2f %% block checksummed, " % (100 * float(buf['vsize']) / self.totalsize)

        out += "%s bytes done" % bytes_fmt(buf['vsize'])
        if self.circle.reduce_time_interval != 0:
            rate = float(buf['vsize'] - self.vsize_prior) / self.circle.reduce_time_interval
            self.vsize_prior = buf['vsize']
            out += ", estimated checksum rate: %s/s" % bytes_fmt(rate)
        print(out)
Example #7
0
    def reduce_report(self, buf):
        out = ""
        if self.totalsize != 0:
            out += "%.2f %% finished, " % (100 * float(buf['cnt_filesize']) / self.totalsize)

        out += "%s copied" % bytes_fmt(buf['cnt_filesize'])

        if self.circle.reduce_time_interval != 0:
            rate = float(buf['cnt_filesize'] - self.cnt_filesize_prior) / self.circle.reduce_time_interval
            self.cnt_filesize_prior = buf['cnt_filesize']
            out += ", estimated transfer rate: %s/s" % bytes_fmt(rate)

        print(out)
Example #8
0
    def reduce_report(self, buf):
        out = ""
        if self.totalsize != 0:
            out += "%.2f %% block checksummed, " % (100 * float(buf['vsize']) /
                                                    self.totalsize)

        out += "%s bytes done" % bytes_fmt(buf['vsize'])
        if self.circle.reduce_time_interval != 0:
            rate = float(buf['vsize'] -
                         self.vsize_prior) / self.circle.reduce_time_interval
            self.vsize_prior = buf['vsize']
            out += ", estimated checksum rate: %s/s" % bytes_fmt(rate)
        print(out)
Example #9
0
    def reduce_report(self, buf):
        out = ""
        if self.totalsize != 0:
            out += "%.2f %% finished, " % (100 * float(buf['cnt_filesize']) // self.totalsize)

        out += "%s copied" % bytes_fmt(buf['cnt_filesize'])

        if self.circle.reduce_time_interval != 0:
            rate = float(buf['cnt_filesize'] - self.cnt_filesize_prior) // self.circle.reduce_time_interval
            self.cnt_filesize_prior = buf['cnt_filesize']
            out += ", estimated transfer rate: %s/s" % bytes_fmt(rate)

        out += ", memory usage: %s" % bytes_fmt(buf['mem_snapshot'])
        print(out)
Example #10
0
    def reduce_report(self, buf):
        out = ""
        if self.totalsize != 0:
            out += "%.2f %% verified, " % (100 * float(buf['vsize']) / self.totalsize)

        out += "%s bytes done" % bytes_fmt(buf['vsize'])
        print(out)
Example #11
0
    def reduce_report(self, buf):
        out = ""
        if self.totalsize != 0:
            out += "%.2f %% verified, " % (100 * float(buf['vsize']) /
                                           self.totalsize)

        out += "%s bytes done" % bytes_fmt(buf['vsize'])
        print(out)
Example #12
0
    def epilogue(self):
        total_dirs, total_files, total_filesize, total_symlinks, total_skipped = self.total_tally()
        self.time_ended = MPI.Wtime()

        if self.circle.rank == 0:
            print("\nFWALK Epilogue:\n")
            print("\t{:<20}{:<20}".format("Directory count:", total_dirs))
            print("\t{:<20}{:<20}".format("Sym Links count:", total_symlinks))
            print("\t{:<20}{:<20}".format("File count:", total_files))
            print("\t{:<20}{:<20}".format("Skipped count:", total_skipped))
            print("\t{:<20}{:<20}".format("Total file size:", bytes_fmt(total_filesize)))
            if total_files != 0:
                print("\t{:<20}{:<20}".format("Avg file size:", bytes_fmt(total_filesize/float(total_files))))
            print("\t{:<20}{:<20}".format("Tree talk time:", utils.conv_time(self.time_ended - self.time_started)))
            print("\tFWALK Loads: %s" % taskloads)
            print("")

        return total_filesize
Example #13
0
def main():
    global comm, args
    args = parse_and_bcast(comm, gen_parser)

    try:
        G.src = utils.check_src2(args.path)
    except ValueError as e:
        err_and_exit("Error: %s not accessible" % e)

    G.loglevel = args.loglevel

    hosts_cnt = tally_hosts()

    if comm.rank == 0:
        print("Running Parameters:\n")
        print("\t{:<20}{:<20}".format("fprof version:", __version__))
        print("\t{:<20}{:<20}".format("Num of hosts:", hosts_cnt))
        print("\t{:<20}{:<20}".format("Num of processes:", MPI.COMM_WORLD.Get_size()))
        print("\t{:<20}{:<20}".format("Root path:", G.src))

    circle = Circle()
    treewalk = ProfileWalk(circle, G.src, perfile=args.perfile)
    circle.begin(treewalk)

    gen_histogram()

    # we need the total file size to calculate GPFS efficiency
    total_file_size = treewalk.epilogue()

    if args.gpfs_block_alloc:
        gpfs_blocks = gather_gpfs_blocks()
        if comm.rank == 0:
            print("\nGPFS Block Alloc Report:\n")
            print("\tSubblocks: %s\n" % gpfs_blocks)
            for idx, bsz in enumerate(G.gpfs_block_size):
                gpfs_file_size = gpfs_blocks[idx] * G.gpfs_subs[idx]
                fmt_msg = "\tBlocksize: {:<6}   Estimated Space: {:<20s}   Efficiency: {:>6.0%}"
                if gpfs_file_size != 0:
                    print(fmt_msg.format(bsz, bytes_fmt(gpfs_file_size), total_file_size/float(gpfs_file_size)))
                else:
                    print(fmt_msg.format(bsz, bytes_fmt(gpfs_file_size), 0))

    treewalk.cleanup()
    circle.finalize()
Example #14
0
 def epilogue(self):
     self.wtime_ended = MPI.Wtime()
     if self.circle.rank == 0:
         print("")
         if self.totalsize == 0:
             return
         time = self.wtime_ended - self.wtime_started
         rate = float(self.totalsize) / time
         print("Checksumming Completed In: %.2f seconds" % time)
         print("Average Rate: %s/s\n" % bytes_fmt(rate))
Example #15
0
 def epilogue(self):
     self.wtime_ended = MPI.Wtime()
     if self.circle.rank == 0:
         print("")
         if self.totalsize == 0:
             return
         time = self.wtime_ended - self.wtime_started
         rate = float(self.totalsize) / time
         print("Checksumming Completed In: %.2f seconds" % time)
         print("Average Rate: %s/s\n" % bytes_fmt(rate))
Example #16
0
def prep_recovery():
    """ Prepare for checkpoint recovery, return recovered workq """
    global args, circle

    oldsz, tsz, sz = 0, 0, 0
    cobj = None
    local_checkpoint_cnt = 0
    chk_file = ".pcp_workq.%s.%s" % (args.rid, circle.rank)

    if os.path.exists(chk_file):
        local_checkpoint_cnt = 1
        with open(chk_file, "rb") as f:
            try:
                cobj = pickle.load(f)
                sz = get_workq_size(cobj.workq)
                src = cobj.src
                dest = cobj.dest
                oldsz = cobj.totalsize
            except Exception as e:
                log.error("error reading %s" % chk_file, extra=dmsg)
                circle.comm.Abort()

    log.debug("located chkpoint %s, sz=%s, local_cnt=%s" %
                 (chk_file, sz, local_checkpoint_cnt), extra=dmsg)

    total_checkpoint_cnt = circle.comm.allreduce(local_checkpoint_cnt)
    log.debug("total_checkpoint_cnt = %s" % total_checkpoint_cnt, extra=dmsg)
    verify_checkpoint(chk_file, total_checkpoint_cnt)

    # acquire total size
    G.totalsize = circle.comm.allreduce(sz)
    if G.totalsize == 0:
        if circle.rank == 0:
            print("\nRecovery size is 0 bytes, can't proceed.")
        circle.exit(0)

    if circle.rank == 0:
        print("\nResume copy\n")
        print("\t{:<20}{:<20}".format("Original size:", bytes_fmt(oldsz)))
        print("\t{:<20}{:<20}".format("Recovery size:", bytes_fmt(G.totalsize)))
        print("")

    return cobj.workq
Example #17
0
    def epilogue(self):
        self.total_tally()
        self.time_ended = MPI.Wtime()

        if self.circle.rank == 0:
            print("\nFprof epilogue:\n")
            fmt_msg1 = "\t{:<25}{:<20,}"    # numeric
            fmt_msg2 = "\t{:<25}{:<20}"     # string

            print(fmt_msg1.format("Directory count:", Tally.total_dirs))
            print(fmt_msg1.format("Sym links count:", Tally.total_symlinks))
            print(fmt_msg1.format("Hard linked files:", Tally.total_nlinked_files))
            print(fmt_msg1.format("File count:", Tally.total_files))
            if args.profdev:
                print(fmt_msg1.format("Dev file count:", Tally.devfile_cnt))
                print(fmt_msg2.format("Dev file size:", bytes_fmt(Tally.devfile_sz)))
            print(fmt_msg1.format("Skipped count:", Tally.total_skipped))
            print(fmt_msg2.format("Total file size:", bytes_fmt(Tally.total_filesize)))
            if Tally.total_files != 0:
                print(fmt_msg2.format("Avg file size:",
                                      bytes_fmt(Tally.total_filesize/float(Tally.total_files))))
            print(fmt_msg1.format("Max files within dir:", Tally.max_files))
            elapsed_time = self.time_ended - self.time_started
            processing_rate = int((Tally.total_files + Tally.total_dirs + Tally.total_symlinks + Tally.total_skipped) / elapsed_time)
            print(fmt_msg2.format("Tree walk time:", utils.conv_time(elapsed_time)))
            print(fmt_msg2.format("Scanning rate:", str(processing_rate) + "/s"))
            print(fmt_msg2.format("Fprof loads:", taskloads))
            print("")

            sendto_syslog("fprof.rootpath", "%s" % ",".join(G.src))
            sendto_syslog("fprof.version", "%s" % __version__)

            sendto_syslog("fprof.dir_count", Tally.total_dirs)
            sendto_syslog("fprof.sym_count", Tally.total_symlinks)
            sendto_syslog("fprof.file_count", Tally.total_files)
            sendto_syslog("fprof.total_file_size", bytes_fmt(Tally.total_filesize))
            if Tally.total_files > 0:
                sendto_syslog("fprof.avg_file_size", bytes_fmt(Tally.total_filesize/float(Tally.total_files)))
            sendto_syslog("fprof.walktime", utils.conv_time(elapsed_time))
            sendto_syslog("fprof.scan_rate", processing_rate)

        return Tally.total_filesize
Example #18
0
def gen_histogram():
    gather_histogram()
    if comm.rank == 0:
        total = hist.sum()
        bucket_scale = 0.5
        if total == 0:
            err_and_exit("No histogram generated.\n")

        print("\nFileset histograms:\n")
        msg = "\t{:<3}{:<15}{:<15,}  {:>8}  {:<50}"

        for idx, rightbound in enumerate(G.bins):
            percent = 100 * hist[idx] / float(total)
            star_count = int(bucket_scale * percent)
            print(msg.format("< ", utils.bytes_fmt(rightbound),
                             hist[idx], "%0.2f%%" % percent, '∎' * star_count))

        # special processing of last row
        percent = 100 * hist[-1] / float(total)
        star_count = int(bucket_scale * percent)
        print(msg.format("> ", utils.bytes_fmt(rightbound), hist[-1],
                         "%0.2f%%" % percent, '∎' * star_count))
Example #19
0
    def reduce_report(self, buf):
        # progress report
        # rate = (buf['cnt_files'] - self.last_cnt)/(MPI.Wtime() - self.last_reduce_time)
        # print("Processed objects: %s, estimated processing rate: %d/s" % (buf['cnt_files'], rate))
        # self.last_cnt = buf['cnt_files']

        rate = (buf['reduce_items'] - self.last_cnt) / (MPI.Wtime() - self.last_reduce_time)
        fmt_msg = "Scanned files: {:<12,}   Processing rate: {:<6,}/s   HWM mem: {:<12}   Work Queue: {:<12,}"
        print(fmt_msg.format(
            buf['reduce_items'],
            int(rate),
            bytes_fmt(buf['mem_snapshot']),
            buf['work_qsize']))
        self.last_cnt = buf['reduce_items']
        self.last_reduce_time = MPI.Wtime()
Example #20
0
 def epilogue(self):
     global taskloads
     self.wtime_ended = MPI.Wtime()
     taskloads = self.circle.comm.gather(self.reduce_items)
     if self.circle.rank == 0:
         if self.totalsize == 0:
             print("\nZero filesize detected, done.\n")
             return
         tlapse = self.wtime_ended - self.wtime_started
         rate = float(self.totalsize) / tlapse
         print("\nFCP Epilogue:\n")
         print("\t{:<20}{:<20}".format("Ending at:", utils.current_time()))
         print("\t{:<20}{:<20}".format("Completed in:", utils.conv_time(tlapse)))
         print("\t{:<20}{:<20}".format("Transfer Rate:", "%s/s" % bytes_fmt(rate)))
         print("\t{:<20}{:<20}".format("FCP Loads:", "%s" % taskloads))
Example #21
0
    def reduce_report(self, buf):
        # progress report
        # rate = (buf['cnt_files'] - self.last_cnt)/(MPI.Wtime() - self.last_reduce_time)
        # print("Processed objects: %s, estimated processing rate: %d/s" % (buf['cnt_files'], rate))
        # self.last_cnt = buf['cnt_files']

        rate = (buf['reduce_items'] - self.last_cnt) / \
            (MPI.Wtime() - self.last_reduce_time)
        if py_version() == "py26":
            fmt_msg = "Scanned files: {0:<12}   Processing rate: {1:<6}/s   HWM mem: {2:<12}   Work Queue: {3:<12}"
        else:
            fmt_msg = "Scanned files: {:<12,}   Processing rate: {:<6,}/s   HWM mem: {:<12}   Work Queue: {:<12,}"
        print(
            fmt_msg.format(buf['reduce_items'], int(rate),
                           bytes_fmt(buf['mem_snapshot']), buf['work_qsize']))
        self.last_cnt = buf['reduce_items']
        self.last_reduce_time = MPI.Wtime()
Example #22
0
 def epilogue(self):
     global taskloads
     self.wtime_ended = MPI.Wtime()
     taskloads = self.circle.comm.gather(self.reduce_items)
     if self.circle.rank == 0:
         if self.totalsize == 0:
             print("\nZero filesize detected, done.\n")
             return
         tlapse = self.wtime_ended - self.wtime_started
         rate = float(self.totalsize) / tlapse
         print("\nFCP Epilogue:\n")
         print("\t{:<20}{:<20}".format("Ending at:", utils.current_time()))
         print("\t{:<20}{:<20}".format("Completed in:", utils.conv_time(tlapse)))
         print("\t{:<20}{:<20}".format("Transfer Rate:", "%s/s" % bytes_fmt(rate)))
         print("\t{:<20}{:<20}".format("Use store chunksums:", "%s" % self.use_store))
         print("\t{:<20}{:<20}".format("Use store workq:", "%s" % self.circle.use_store))
         print("\t{:<20}{:<20}".format("FCP Loads:", "%s" % taskloads))
Example #23
0
def gen_signature(bfsign, totalsize):
    """ Generate a signature for dataset, it assumes the checksum
       option is set and done """
    if comm.rank == 0:
        print("\nAggregating dataset signature ...\n")
    tbegin = MPI.Wtime()
    sig = aggregate_checksums(bfsign)
    tend = MPI.Wtime()
    if comm.rank == 0:
        #print("\t{:<20}{:<20}".format("Aggregated chunks:", size))
        print("\t{:<20}{:<20}".format("Running time:", utils.conv_time(tend - tbegin)))
        print("\t{:<20}{:<20}".format("SHA1 Signature:", sig))
        with open(args.output, "w") as f:
            f.write("sha1: %s\n" % sig)
            f.write("chunksize: %s\n" % fcp.chunksize)
            f.write("fcp version: %s\n" % __version__)
            f.write("src: %s\n" % fcp.src)
            f.write("destination: %s\n" % fcp.dest)
            f.write("date: %s\n" % utils.current_time())
            f.write("totoalsize: %s\n" % utils.bytes_fmt(totalsize))
Example #24
0
def main():
    global comm, args
    args = parse_and_bcast(comm, gen_parser)

    try:
        G.src = utils.check_src(args.path)
    except ValueError as e:
        err_and_exit("Error: %s not accessible" % e)

    G.use_store = args.use_store
    G.loglevel = args.loglevel

    hosts_cnt = tally_hosts()

    if comm.rank == 0:
        print("Running Parameters:\n")
        print("\t{:<20}{:<20}".format("FWALK version:", __version__))
        print("\t{:<20}{:<20}".format("Num of hosts:", hosts_cnt))
        print("\t{:<20}{:<20}".format("Num of processes:",
                                      MPI.COMM_WORLD.Get_size()))
        print("\t{:<20}{:<20}".format("Root path:", utils.choplist(G.src)))

    circle = Circle()
    treewalk = FWalk(circle, G.src)
    circle.begin(treewalk)

    if G.use_store:
        treewalk.flushdb()

    if args.stats:
        hist = global_histogram(treewalk)
        total = hist.sum()
        bucket_scale = 0.5
        if comm.rank == 0:
            print("\nFileset histograms:\n")
            for idx, rightbound in enumerate(bins[1:]):
                percent = 100 * hist[idx] / float(total)
                star_count = int(bucket_scale * percent)
                print("\t{:<3}{:<15}{:<8}{:<8}{:<50}".format(
                    "< ", utils.bytes_fmt(rightbound), hist[idx],
                    "%0.2f%%" % percent, '∎' * star_count))

    if args.stats:
        treewalk.flist.sort(lambda f1, f2: cmp(f1.st_size, f2.st_size),
                            reverse=True)
        globaltops = comm.gather(treewalk.flist[:args.top])
        if comm.rank == 0:
            globaltops = [item for sublist in globaltops for item in sublist]
            globaltops.sort(lambda f1, f2: cmp(f1.st_size, f2.st_size),
                            reverse=True)
            if len(globaltops) < args.top:
                args.top = len(globaltops)
            print("\nStats, top %s files\n" % args.top)
            for i in xrange(args.top):
                print("\t{:15}{:<30}".format(
                    utils.bytes_fmt(globaltops[i].st_size),
                    globaltops[i].path))

    treewalk.epilogue()
    treewalk.cleanup()
    circle.finalize()
Example #25
0
    def epilogue(self):
        self.total_tally()
        self.time_ended = MPI.Wtime()

        if self.circle.rank == 0:
            print("\nFprof epilogue:\n")
            if py_version() != "py26":
                fmt_msg1 = "\t{0:<25}{1:<20,}"  # numeric
            else:  # 2.6 compat
                fmt_msg1 = "\t{0:<25}{1:<20}"  # numeric

            fmt_msg2 = "\t{0:<25}{1:<20}"  # string
            fmt_msg3 = "\t{0:<25}{1:<20.2f}"  # float
            print(fmt_msg1.format("Directory count:", Tally.total_dirs))
            print(fmt_msg1.format("Sym links count:", Tally.total_symlinks))
            print(
                fmt_msg1.format("Hard linked files:",
                                Tally.total_nlinked_files))
            print(fmt_msg1.format("File count:", Tally.total_files))
            print(fmt_msg1.format("Zero byte files:", Tally.total_0byte_files))
            print(fmt_msg1.format("Sparse files:", Tally.total_sparse))

            if args.profdev:
                print(fmt_msg1.format("Dev file count:", Tally.devfile_cnt))
                print(
                    fmt_msg2.format("Dev file size:",
                                    bytes_fmt(Tally.devfile_sz)))
            print(fmt_msg1.format("Skipped count:", Tally.total_skipped))
            print(
                fmt_msg2.format("Total file size:",
                                bytes_fmt(Tally.total_filesize)))

            if args.cpr:
                compressed = float(Tally.total_blocks * 512)
                uncompressed = float(Tally.total_stat_filesize)
                ratio = uncompressed / compressed
                saving = 1 - compressed / uncompressed
                print(fmt_msg3.format("Compression Ratio:", ratio))
                print(fmt_msg3.format("Compression Saving:", saving))

            if Tally.total_files != 0:
                print(
                    fmt_msg2.format(
                        "Avg file size:",
                        bytes_fmt(Tally.total_filesize /
                                  float(Tally.total_files))))
            print(fmt_msg1.format("Max files within dir:", Tally.max_files))
            elapsed_time = self.time_ended - self.time_started
            processing_rate = int(
                (Tally.total_files + Tally.total_dirs + Tally.total_symlinks +
                 Tally.total_skipped) / elapsed_time)
            print(
                fmt_msg2.format("Tree walk time:",
                                utils.conv_time(elapsed_time)))
            print(
                fmt_msg2.format("Scanning rate:",
                                str(processing_rate) + "/s"))
            print(fmt_msg2.format("Fprof loads:", Tally.taskloads))
            print("")

            if args.syslog:
                sendto_syslog("fprof.rootpath", "%s" % ",".join(G.src))
                sendto_syslog("fprof.version", "%s" % __version__)
                sendto_syslog("fprof.revid", "%s" % __revid__)
                sendto_syslog("fprof.dir_count", Tally.total_dirs)
                sendto_syslog("fprof.sym_count", Tally.total_symlinks)
                sendto_syslog("fprof.file_count", Tally.total_files)
                sendto_syslog("fprof.total_file_size",
                              bytes_fmt(Tally.total_filesize))
                if Tally.total_files > 0:
                    sendto_syslog(
                        "fprof.avg_file_size",
                        bytes_fmt(Tally.total_filesize /
                                  float(Tally.total_files)))
                sendto_syslog("fprof.walktime", utils.conv_time(elapsed_time))
                sendto_syslog("fprof.scan_rate", processing_rate)

        return Tally.total_filesize
Example #26
0
def main():
    global comm, args

    fpipe.listen()

    args = parse_and_bcast(comm, gen_parser)

    try:
        G.src = utils.check_src2(args.path)
    except ValueError as e:
        err_and_exit("Error: %s not accessible" % e)

    G.loglevel = args.loglevel

    hosts_cnt = tally_hosts()

    if args.exclude:
        process_exclude_file()

    if comm.rank == 0:
        print("Running Parameters:\n")
        print("\t{:<20}{:<20}".format("fprof version:", __version__))
        print("\t{:<20}{:<20}".format("Num of hosts:", hosts_cnt))
        print("\t{:<20}{:<20}".format("Num of processes:", MPI.COMM_WORLD.Get_size()))
        print("\t{:<20}{:<20}".format("Root path:", G.src))

        if args.exclude:
            print("\nExclusions:\n")
            for ele in EXCLUDE:
                print("\t %s" % ele)

    circle = Circle()
    if args.perprocess:
        circle.report_enabled = True
    else:
        circle.reduce_enabled = True

    treewalk = ProfileWalk(circle, G.src, perfile=args.perfile)
    circle.begin(treewalk)

    # we need the total file size to calculate GPFS efficiency
    total_file_size = treewalk.epilogue()

    msg1, msg2 = gen_histogram(total_file_size)

    if comm.rank == 0:
        sendto_syslog("fprof.filecount.hist", msg1)
        sendto_syslog("fprof.fsize_perc.hist", msg2)

    if args.top:
        topfiles = gather_topfiles()
        if comm.rank == 0:
            print("\nTop File Report:\n")
            # edge case: not enough files (< args.top)
            totaln = args.top if len(topfiles) > args.top else len(topfiles)
            for index, _ in enumerate(xrange(totaln)):
                size, path = topfiles[index]
                print("\t%s: %s (%s)" % (index + 1,
                                       path,
                                       utils.bytes_fmt(size)))
            print("")

    if args.gpfs_block_alloc:
        gpfs_blocks = gather_gpfs_blocks()
        if comm.rank == 0:
            print("\nGPFS Block Alloc Report:\n")
            print("\tinode size: %s" % args.inodesz)
            print("\tDII (data-in-inode) count: %s" % DII_COUNT)
            print("\tSubblocks: %s\n" % gpfs_blocks)
            for idx, bsz in enumerate(G.gpfs_block_size):
                gpfs_file_size = gpfs_blocks[idx] * G.gpfs_subs[idx]
                fmt_msg = "\tBlocksize: {:<6}   Estimated Space: {:<20s}   Efficiency: {:>6.2%}"
                if gpfs_file_size != 0:
                    print(fmt_msg.format(bsz, bytes_fmt(gpfs_file_size), total_file_size/float(gpfs_file_size)))
                else:
                    print(fmt_msg.format(bsz, bytes_fmt(gpfs_file_size), 0))

    treewalk.cleanup()
    circle.finalize()
Example #27
0
 def set_adaptive_chunksize(self, totalsz):
     self.chunksize = utils.calc_chunksize(totalsz)
     if self.circle.rank == 0:
         print("Adaptive chunksize: %s" % bytes_fmt(self.chunksize))
Example #28
0
def gen_histogram(total_file_size):
    """Generate file set histogram"""

    syslog_filecount_hist = ""
    syslog_fsizeperc_hist = ""
    bins_fmt = utils.bins_strs(G.bins)
    gather_histogram()
    if comm.rank == 0:
        total_num_of_files = hist.sum()
        if total_num_of_files == 0:
            err_and_exit("No histogram generated.\n")

        print("Fileset Histogram\n")

        if py_version() == "py26":
            msg = "\t{0:<3}{1:<15}{2:<15}{3:>10}{4:>15}{5:>15}"
            msg2 = "\t{0:<3}{1:<15}{2:<15}{3:>10}{4:>15}{5:>15}"
        else:
            msg = "\t{:<3}{:<15}{:<15,}{:>10}{:>15}{:>15}"
            msg2 = "\t{:<3}{:<15}{:<15}{:>10}{:>15}{:>15}"

        print(
            msg2.format("", "Buckets", "Num of Files", "Size", "%(Files)",
                        "%(Size)"))
        print("")
        for idx, rightbound in enumerate(G.bins):
            percent_files = 100 * \
                hist[idx] / \
                float(total_num_of_files) if total_num_of_files != 0 else 0
            percent_size = 100 * \
                fsize[idx] / \
                float(total_file_size) if total_file_size != 0 else 0

            print(
                msg.format("<= ", utils.bytes_fmt(rightbound), hist[idx],
                           utils.bytes_fmt(fsize[idx]),
                           "%0.2f%%" % percent_files,
                           "%0.2f%%" % percent_size))

            # NO BLOCK HISTOGRAM
            #
            # bucket_scale = 0.30
            # star_count = int(bucket_scale * percent)
            # print(msg.format("<= ", utils.bytes_fmt(rightbound),
            #                  hist[idx],
            #                  utils.bytes_fmt(fsize[idx]),
            #                  "%0.2f%%" % percent, '∎' * star_count))

            syslog_filecount_hist += "%s = %s, " % (bins_fmt[idx], hist[idx])
            syslog_fsizeperc_hist += "%s = %s, " % (bins_fmt[idx],
                                                    percent_size)

        # special processing of last row
        percent_files = 100 * \
            hist[-1] / \
            float(total_num_of_files) if total_num_of_files != 0 else 0
        percent_size = 100 * \
            fsize[-1] / float(total_file_size) if total_file_size != 0 else 0
        print(
            msg.format("> ", utils.bytes_fmt(rightbound), hist[-1],
                       utils.bytes_fmt(fsize[-1]), "%0.2f%%" % percent_files,
                       "%0.2f%%" % percent_size))

        # star_count = int(bucket_scale * percent)
        # print(msg.format("> ", utils.bytes_fmt(rightbound), hist[-1],
        #                  utils.bytes_fmt(fsize[-1]),
        #                  "%0.2f%%" % percent, '∎' * star_count))
        syslog_filecount_hist += "%s = %s" % (bins_fmt[-1], hist[-1])
        syslog_fsizeperc_hist += "%s = %s" % (bins_fmt[-1], percent_size)

        # end of if comm.rank == 0

    return syslog_filecount_hist, syslog_fsizeperc_hist
Example #29
0
def main():
    global comm, args, stripe_out, DIR_BINS, DIR_HIST

    fpipe.listen()

    args = parse_and_bcast(comm, gen_parser)

    try:
        G.src = utils.check_src2(args.path)
    except ValueError as e:
        err_and_exit("Error: %s not accessible" % e)

    G.memitem_threshold = args.item
    G.loglevel = args.loglevel
    hosts_cnt = tally_hosts()

    # doing directory profiling?
    if args.dirprof:
        # check the input
        if args.dirbins is None:
            # err_and_exit("Error: missing directory bin parameters: a sorted integer list\n")
            args.dirbins = [
                0, 10, 100, 1000, 10**4, 10**5, 10**6, 10**7, 10**8
            ]
        else:
            myList = sorted(set(args.dirbins))
            if myList != args.dirbins:
                err_and_exit("Error: duplicated, or unsorted bins: %s\n" %
                             args.dirbins)

        DIR_BINS = args.dirbins
        DIR_HIST = [0] * (len(DIR_BINS) + 1)

    # Doing stripe analysis? lfs is not really bullet-proof way
    # we might need a better way of doing fstype check.

    if args.lustre_stripe:
        G.lfs_bin = lfs.check_lfs()
        G.stripe_threshold = utils.conv_unit(args.stripe_threshold)
        try:
            stripe_out = os.open(args.stripe_output,
                                 os.O_CREAT | os.O_WRONLY | os.O_APPEND)
        except:
            err_and_exit("Error: can't create stripe output: %s" %
                         args.stripe_output)

    if args.exclude:
        process_exclude_file()

    if comm.rank == 0:
        print("Running Parameters:\n")
        print("\t{0:<20}{1:<20}".format("fprof version:", __version__))
        print("\t{0:<20}{1:<20}".format("Full rev id:", __revid__))
        print("\t{0:<20}{1:<20}".format("Num of hosts:", hosts_cnt))
        print("\t{0:<20}{1:<20}".format("Num of processes:",
                                        MPI.COMM_WORLD.Get_size()))

        if args.syslog:
            print("\t{0:<20}{1:<20}".format("Syslog report: ", "yes"))
        else:
            print("\t{0:<20}{1:<20}".format("Syslog report: ", "no"))

        if args.dirprof:
            print("\t{0:<20}{1:<20}".format("Dir bins: ", args.dirbins))

        if args.lustre_stripe:
            print("\t{0:<20}{1:<20}".format("Stripe analysis: ", "yes"))
            print("\t{0:<20}{1:<20}".format("Stripe threshold: ",
                                            args.stripe_threshold))
        else:
            print("\t{0:<20}{1:<20}".format("Stripe analysis: ", "no"))
        print("\t{0:<20}{1:<20}".format("Root path:", G.src))

        if args.exclude:
            print("\nExclusions:\n")
            for ele in EXCLUDE:
                print("\t %s" % ele)

    circle = Circle()
    if args.perprocess:
        circle.report_enabled = True
    else:
        circle.report_enabled = False

    if args.progress:
        circle.report_enabled = False
        circle.reduce_enabled = True

    treewalk = ProfileWalk(circle, G.src, perfile=args.perfile)
    circle.begin(treewalk)

    # we need the total file size to calculate GPFS efficiency
    total_file_size = treewalk.epilogue()

    msg1, msg2 = gen_histogram(total_file_size)

    if args.dirprof:
        gen_directory_histogram()

    if comm.rank == 0 and args.syslog:
        sendto_syslog("fprof.filecount.hist", msg1)
        sendto_syslog("fprof.fsize_perc.hist", msg2)

    if args.topn_files:
        topfiles = gather_topfiles()
        if comm.rank == 0:
            print("\nTop N File Report:\n")
            # edge case: not enough files (< args.top)
            totaln = args.topn_files if len(
                topfiles) > args.topn_files else len(topfiles)
            for index, _ in enumerate(xrange(totaln)):
                size, path = topfiles[index]
                print("\t%s: %s (%s)" %
                      (index + 1, path, utils.bytes_fmt(size)))
            print("")

    if args.topn_dirs:
        topdirs = gather_topdirs()
        if comm.rank == 0:
            print("\nTop N Directory Report:\n")
            totaln = args.topn_dirs if len(topdirs) > args.topn_dirs else len(
                topdirs)
            for index, _ in enumerate(xrange(totaln)):
                size, path = topdirs[index]
                print("\t{0:}: {1:}  ({2:,} items)".format(
                    index + 1, path, size))

            print("")

    if args.gpfs_block_alloc:
        gpfs_blocks = gather_gpfs_blocks()
        gather_gpfs_dii()
        if comm.rank == 0:
            print("\nGPFS Block Alloc Report:\n")
            print("\t{0:<15}{1:<4}".format("inode size:", args.inodesz))
            print("\t{0:<25}{1:>15,}".format("DII (data-in-inode) count:",
                                             DII_COUNT))
            print("\tSubblocks: %s\n" % gpfs_blocks)
            fmt_msg = "\tBlocksize: {0:<6}   Estimated Space: {1:<20s}   Efficiency: {2:>6.2%}"
            for idx, bsz in enumerate(G.gpfs_block_size):
                gpfs_file_size = gpfs_blocks[idx] * G.gpfs_subs[idx]

                if gpfs_file_size != 0:
                    print(
                        fmt_msg.format(bsz, bytes_fmt(gpfs_file_size),
                                       total_file_size /
                                       float(gpfs_file_size)))
                else:
                    print(fmt_msg.format(bsz, bytes_fmt(gpfs_file_size), 0))

    treewalk.cleanup()
    circle.finalize()

    if args.lustre_stripe and stripe_out:
        os.close(stripe_out)

        sp_workload = comm.gather(Tally.spcnt)
        if comm.rank == 0:
            print("Stripe workload total: %s, distribution: %s" %
                  (sum(sp_workload), sp_workload))
Example #30
0
 def set_adaptive_chunksize(self, totalsz):
     self.chunksize = utils.calc_chunksize(totalsz)
     if self.circle.rank == 0:
         print("Adaptive chunksize: %s" % bytes_fmt(self.chunksize))