def epilogue(self): self.total_tally() self.time_ended = MPI.Wtime() if self.circle.rank == 0: print("\nFWALK Epilogue:\n") print("\t{:<20}{:<20}".format("Directory count:", T.total_dirs)) print("\t{:<20}{:<20}".format("Sym Links count:", T.total_symlinks)) print("\t{:<20}{:<20}".format("File count:", T.total_files)) print("\t{:<20}{:<20}".format("Skipped count:", T.total_skipped)) print("\t{:<20}{:<20}".format("Total file size:", bytes_fmt(T.total_filesize))) if T.total_files != 0: print("\t{:<20}{:<20}".format( "Avg file size:", bytes_fmt(T.total_filesize / float(T.total_files)))) print("\t{:<20}{:<20}".format( "Tree talk time:", utils.conv_time(self.time_ended - self.time_started))) print("\t{:<20}{:<20}".format("Use store flist:", "%s" % self.use_store)) print("\t{:<20}{:<20}".format("Use store workq:", "%s" % self.circle.use_store)) print("\tFWALK Loads: %s" % taskloads) print("")
def prep_recovery(): """ Prepare for checkpoint recovery, return recovered workq """ global args, circle oldsz, tsz, sz = 0, 0, 0 sz_db = 0 cobj = None local_checkpoint_cnt = 0 chk_file = ".pcp_workq.%s.%s" % (args.rid, circle.rank) chk_file_db = ".pcp_workq.%s.%s.db" % (args.rid, circle.rank) G.chk_file = chk_file G.chk_file_db = chk_file_db if os.path.exists(chk_file): local_checkpoint_cnt = 1 with open(chk_file, "rb") as f: try: cobj = pickle.load(f) sz = get_workq_size(cobj.workq) src = cobj.src dest = cobj.dest oldsz = cobj.totalsize except Exception as e: log.error("error reading %s" % chk_file, extra=dmsg) circle.comm.Abort() if os.path.exists(chk_file_db): qsize_db = 0 local_checkpoint_cnt = 1 conn = sqlite3.connect(chk_file_db) cur = conn.cursor() try: cur.execute("SELECT * FROM checkpoint") qsize_db, sz_db = cur.fetchone() except sqlite3.OperationalError as e: pass log.debug("located chkpoint %s, sz=%s, local_cnt=%s" % (chk_file, sz, local_checkpoint_cnt), extra=dmsg) total_checkpoint_cnt = circle.comm.allreduce(local_checkpoint_cnt) log.debug("total_checkpoint_cnt = %s" % total_checkpoint_cnt, extra=dmsg) verify_checkpoint(chk_file, total_checkpoint_cnt) # acquire total size total_sz_mem = circle.comm.allreduce(sz) total_sz_db = circle.comm.allreduce(sz_db) T.total_filesize = total_sz_mem + total_sz_db if T.total_filesize == 0: if circle.rank == 0: print("\nRecovery size is 0 bytes, can't proceed.") circle.exit(0) if circle.rank == 0: print("\nResume copy\n") print("\t{:<20}{:<20}".format("Original size:", bytes_fmt(oldsz))) print("\t{:<20}{:<20}".format("Recovery size:", bytes_fmt(T.total_filesize))) print("") return cobj.workq
def epilogue(self): total_dirs, total_files, total_filesize, total_symlinks, total_skipped, maxfiles = self.total_tally() self.time_ended = MPI.Wtime() if self.circle.rank == 0: print("\nFprof epilogue:\n") fmt_msg1 = "\t{:<25}{:<20,}" # numeric fmt_msg2 = "\t{:<25}{:<20}" # string print(fmt_msg1.format("Directory count:", total_dirs)) print(fmt_msg1.format("Sym Links count:", total_symlinks)) print(fmt_msg1.format("File count:", total_files)) print(fmt_msg1.format("Skipped count:", total_skipped)) print(fmt_msg2.format("Total file size:", bytes_fmt(total_filesize))) if total_files != 0: print(fmt_msg2.format("Avg file size:", bytes_fmt(total_filesize/float(total_files)))) print(fmt_msg1.format("Max files within dir:", maxfiles)) elapsed_time = self.time_ended - self.time_started processing_rate = int((total_files + total_dirs + total_symlinks + total_skipped) / elapsed_time) print(fmt_msg2.format("Tree walk time:", utils.conv_time(elapsed_time))) print(fmt_msg2.format("Scanning rate:", str(processing_rate) + "/s")) print(fmt_msg2.format("Fprof loads:", taskloads)) print("") return total_filesize
def main(): global comm, args args = parse_and_bcast(comm, gen_parser) try: G.src = utils.check_src(args.path) except ValueError as e: err_and_exit("Error: %s not accessible" % e) G.use_store = args.use_store G.loglevel = args.loglevel hosts_cnt = tally_hosts() if comm.rank == 0: print("Running Parameters:\n") print("\t{:<20}{:<20}".format("FWALK version:", __version__)) print("\t{:<20}{:<20}".format("Num of hosts:", hosts_cnt)) print("\t{:<20}{:<20}".format("Num of processes:", MPI.COMM_WORLD.Get_size())) print("\t{:<20}{:<20}".format("Root path:", utils.choplist(G.src))) circle = Circle() treewalk = FWalk(circle, G.src) circle.begin(treewalk) if G.use_store: treewalk.flushdb() if args.stats: hist = global_histogram(treewalk) total = hist.sum() bucket_scale = 0.5 if comm.rank == 0: print("\nFileset histograms:\n") for idx, rightbound in enumerate(bins[1:]): percent = 100 * hist[idx] / float(total) star_count = int(bucket_scale * percent) print("\t{:<3}{:<15}{:<8}{:<8}{:<50}".format("< ", utils.bytes_fmt(rightbound), hist[idx], "%0.2f%%" % percent, '∎' * star_count)) if args.stats: treewalk.flist.sort(lambda f1, f2: cmp(f1.st_size, f2.st_size), reverse=True) globaltops = comm.gather(treewalk.flist[:args.top]) if comm.rank == 0: globaltops = [item for sublist in globaltops for item in sublist] globaltops.sort(lambda f1, f2: cmp(f1.st_size, f2.st_size), reverse=True) if len(globaltops) < args.top: args.top = len(globaltops) print("\nStats, top %s files\n" % args.top) for i in xrange(args.top): print("\t{:15}{:<30}".format(utils.bytes_fmt(globaltops[i].st_size), globaltops[i].path)) treewalk.epilogue() treewalk.cleanup() circle.finalize()
def gen_histogram(total_file_size): syslog_filecount_hist = "" syslog_fsizeperc_hist = "" bins_fmt = utils.bins_strs(G.bins) gather_histogram() if comm.rank == 0: total_num_of_files = hist.sum() if total_num_of_files == 0: err_and_exit("No histogram generated.\n") print("Fileset Histogram\n") msg = "\t{:<3}{:<15}{:<15,}{:>10}{:>15}{:>15}" msg2 = "\t{:<3}{:<15}{:<15}{:>10}{:>15}{:>15}" print(msg2.format("", "Buckets", "Num of Files", "Size", "%(Files)", "%(Size)")) print("") for idx, rightbound in enumerate(G.bins): percent_files = 100 * hist[idx] / float(total_num_of_files) percent_size = 100 * fsize[idx] / float(total_file_size) print(msg.format("<= ", utils.bytes_fmt(rightbound), hist[idx], utils.bytes_fmt(fsize[idx]), "%0.2f%%" % percent_files, "%0.2f%%" % percent_size)) # NO BLOCK HISTOGRAM # # bucket_scale = 0.30 # star_count = int(bucket_scale * percent) # print(msg.format("<= ", utils.bytes_fmt(rightbound), # hist[idx], # utils.bytes_fmt(fsize[idx]), # "%0.2f%%" % percent, '∎' * star_count)) syslog_filecount_hist += "%s = %s, " % (bins_fmt[idx], hist[idx]) syslog_fsizeperc_hist += "%s = %s, " % (bins_fmt[idx], percent_size) # special processing of last row percent_files = 100 * hist[-1] / float(total_num_of_files) percent_size = 100 * fsize[-1] / float(total_file_size) print(msg.format("> ", utils.bytes_fmt(rightbound), hist[-1], utils.bytes_fmt(fsize[-1]), "%0.2f%%" % percent_files, "%0.2f%%" % percent_size)) # star_count = int(bucket_scale * percent) # print(msg.format("> ", utils.bytes_fmt(rightbound), hist[-1], # utils.bytes_fmt(fsize[-1]), # "%0.2f%%" % percent, '∎' * star_count)) syslog_filecount_hist += "%s = %s" % (bins_fmt[-1], hist[-1]) syslog_fsizeperc_hist += "%s = %s" % (bins_fmt[-1], percent_size) return syslog_filecount_hist, syslog_fsizeperc_hist
def reduce_report(self, buf): out = "" if self.totalsize != 0: out += "%.2f %% block checksummed, " % (100 * float(buf['vsize']) / self.totalsize) out += "%s bytes done" % bytes_fmt(buf['vsize']) if self.circle.reduce_time_interval != 0: rate = float(buf['vsize'] - self.vsize_prior) / self.circle.reduce_time_interval self.vsize_prior = buf['vsize'] out += ", estimated checksum rate: %s/s" % bytes_fmt(rate) print(out)
def reduce_report(self, buf): out = "" if self.totalsize != 0: out += "%.2f %% finished, " % (100 * float(buf['cnt_filesize']) / self.totalsize) out += "%s copied" % bytes_fmt(buf['cnt_filesize']) if self.circle.reduce_time_interval != 0: rate = float(buf['cnt_filesize'] - self.cnt_filesize_prior) / self.circle.reduce_time_interval self.cnt_filesize_prior = buf['cnt_filesize'] out += ", estimated transfer rate: %s/s" % bytes_fmt(rate) print(out)
def reduce_report(self, buf): out = "" if self.totalsize != 0: out += "%.2f %% finished, " % (100 * float(buf['cnt_filesize']) // self.totalsize) out += "%s copied" % bytes_fmt(buf['cnt_filesize']) if self.circle.reduce_time_interval != 0: rate = float(buf['cnt_filesize'] - self.cnt_filesize_prior) // self.circle.reduce_time_interval self.cnt_filesize_prior = buf['cnt_filesize'] out += ", estimated transfer rate: %s/s" % bytes_fmt(rate) out += ", memory usage: %s" % bytes_fmt(buf['mem_snapshot']) print(out)
def reduce_report(self, buf): out = "" if self.totalsize != 0: out += "%.2f %% verified, " % (100 * float(buf['vsize']) / self.totalsize) out += "%s bytes done" % bytes_fmt(buf['vsize']) print(out)
def epilogue(self): total_dirs, total_files, total_filesize, total_symlinks, total_skipped = self.total_tally() self.time_ended = MPI.Wtime() if self.circle.rank == 0: print("\nFWALK Epilogue:\n") print("\t{:<20}{:<20}".format("Directory count:", total_dirs)) print("\t{:<20}{:<20}".format("Sym Links count:", total_symlinks)) print("\t{:<20}{:<20}".format("File count:", total_files)) print("\t{:<20}{:<20}".format("Skipped count:", total_skipped)) print("\t{:<20}{:<20}".format("Total file size:", bytes_fmt(total_filesize))) if total_files != 0: print("\t{:<20}{:<20}".format("Avg file size:", bytes_fmt(total_filesize/float(total_files)))) print("\t{:<20}{:<20}".format("Tree talk time:", utils.conv_time(self.time_ended - self.time_started))) print("\tFWALK Loads: %s" % taskloads) print("") return total_filesize
def main(): global comm, args args = parse_and_bcast(comm, gen_parser) try: G.src = utils.check_src2(args.path) except ValueError as e: err_and_exit("Error: %s not accessible" % e) G.loglevel = args.loglevel hosts_cnt = tally_hosts() if comm.rank == 0: print("Running Parameters:\n") print("\t{:<20}{:<20}".format("fprof version:", __version__)) print("\t{:<20}{:<20}".format("Num of hosts:", hosts_cnt)) print("\t{:<20}{:<20}".format("Num of processes:", MPI.COMM_WORLD.Get_size())) print("\t{:<20}{:<20}".format("Root path:", G.src)) circle = Circle() treewalk = ProfileWalk(circle, G.src, perfile=args.perfile) circle.begin(treewalk) gen_histogram() # we need the total file size to calculate GPFS efficiency total_file_size = treewalk.epilogue() if args.gpfs_block_alloc: gpfs_blocks = gather_gpfs_blocks() if comm.rank == 0: print("\nGPFS Block Alloc Report:\n") print("\tSubblocks: %s\n" % gpfs_blocks) for idx, bsz in enumerate(G.gpfs_block_size): gpfs_file_size = gpfs_blocks[idx] * G.gpfs_subs[idx] fmt_msg = "\tBlocksize: {:<6} Estimated Space: {:<20s} Efficiency: {:>6.0%}" if gpfs_file_size != 0: print(fmt_msg.format(bsz, bytes_fmt(gpfs_file_size), total_file_size/float(gpfs_file_size))) else: print(fmt_msg.format(bsz, bytes_fmt(gpfs_file_size), 0)) treewalk.cleanup() circle.finalize()
def epilogue(self): self.wtime_ended = MPI.Wtime() if self.circle.rank == 0: print("") if self.totalsize == 0: return time = self.wtime_ended - self.wtime_started rate = float(self.totalsize) / time print("Checksumming Completed In: %.2f seconds" % time) print("Average Rate: %s/s\n" % bytes_fmt(rate))
def prep_recovery(): """ Prepare for checkpoint recovery, return recovered workq """ global args, circle oldsz, tsz, sz = 0, 0, 0 cobj = None local_checkpoint_cnt = 0 chk_file = ".pcp_workq.%s.%s" % (args.rid, circle.rank) if os.path.exists(chk_file): local_checkpoint_cnt = 1 with open(chk_file, "rb") as f: try: cobj = pickle.load(f) sz = get_workq_size(cobj.workq) src = cobj.src dest = cobj.dest oldsz = cobj.totalsize except Exception as e: log.error("error reading %s" % chk_file, extra=dmsg) circle.comm.Abort() log.debug("located chkpoint %s, sz=%s, local_cnt=%s" % (chk_file, sz, local_checkpoint_cnt), extra=dmsg) total_checkpoint_cnt = circle.comm.allreduce(local_checkpoint_cnt) log.debug("total_checkpoint_cnt = %s" % total_checkpoint_cnt, extra=dmsg) verify_checkpoint(chk_file, total_checkpoint_cnt) # acquire total size G.totalsize = circle.comm.allreduce(sz) if G.totalsize == 0: if circle.rank == 0: print("\nRecovery size is 0 bytes, can't proceed.") circle.exit(0) if circle.rank == 0: print("\nResume copy\n") print("\t{:<20}{:<20}".format("Original size:", bytes_fmt(oldsz))) print("\t{:<20}{:<20}".format("Recovery size:", bytes_fmt(G.totalsize))) print("") return cobj.workq
def epilogue(self): self.total_tally() self.time_ended = MPI.Wtime() if self.circle.rank == 0: print("\nFprof epilogue:\n") fmt_msg1 = "\t{:<25}{:<20,}" # numeric fmt_msg2 = "\t{:<25}{:<20}" # string print(fmt_msg1.format("Directory count:", Tally.total_dirs)) print(fmt_msg1.format("Sym links count:", Tally.total_symlinks)) print(fmt_msg1.format("Hard linked files:", Tally.total_nlinked_files)) print(fmt_msg1.format("File count:", Tally.total_files)) if args.profdev: print(fmt_msg1.format("Dev file count:", Tally.devfile_cnt)) print(fmt_msg2.format("Dev file size:", bytes_fmt(Tally.devfile_sz))) print(fmt_msg1.format("Skipped count:", Tally.total_skipped)) print(fmt_msg2.format("Total file size:", bytes_fmt(Tally.total_filesize))) if Tally.total_files != 0: print(fmt_msg2.format("Avg file size:", bytes_fmt(Tally.total_filesize/float(Tally.total_files)))) print(fmt_msg1.format("Max files within dir:", Tally.max_files)) elapsed_time = self.time_ended - self.time_started processing_rate = int((Tally.total_files + Tally.total_dirs + Tally.total_symlinks + Tally.total_skipped) / elapsed_time) print(fmt_msg2.format("Tree walk time:", utils.conv_time(elapsed_time))) print(fmt_msg2.format("Scanning rate:", str(processing_rate) + "/s")) print(fmt_msg2.format("Fprof loads:", taskloads)) print("") sendto_syslog("fprof.rootpath", "%s" % ",".join(G.src)) sendto_syslog("fprof.version", "%s" % __version__) sendto_syslog("fprof.dir_count", Tally.total_dirs) sendto_syslog("fprof.sym_count", Tally.total_symlinks) sendto_syslog("fprof.file_count", Tally.total_files) sendto_syslog("fprof.total_file_size", bytes_fmt(Tally.total_filesize)) if Tally.total_files > 0: sendto_syslog("fprof.avg_file_size", bytes_fmt(Tally.total_filesize/float(Tally.total_files))) sendto_syslog("fprof.walktime", utils.conv_time(elapsed_time)) sendto_syslog("fprof.scan_rate", processing_rate) return Tally.total_filesize
def gen_histogram(): gather_histogram() if comm.rank == 0: total = hist.sum() bucket_scale = 0.5 if total == 0: err_and_exit("No histogram generated.\n") print("\nFileset histograms:\n") msg = "\t{:<3}{:<15}{:<15,} {:>8} {:<50}" for idx, rightbound in enumerate(G.bins): percent = 100 * hist[idx] / float(total) star_count = int(bucket_scale * percent) print(msg.format("< ", utils.bytes_fmt(rightbound), hist[idx], "%0.2f%%" % percent, '∎' * star_count)) # special processing of last row percent = 100 * hist[-1] / float(total) star_count = int(bucket_scale * percent) print(msg.format("> ", utils.bytes_fmt(rightbound), hist[-1], "%0.2f%%" % percent, '∎' * star_count))
def reduce_report(self, buf): # progress report # rate = (buf['cnt_files'] - self.last_cnt)/(MPI.Wtime() - self.last_reduce_time) # print("Processed objects: %s, estimated processing rate: %d/s" % (buf['cnt_files'], rate)) # self.last_cnt = buf['cnt_files'] rate = (buf['reduce_items'] - self.last_cnt) / (MPI.Wtime() - self.last_reduce_time) fmt_msg = "Scanned files: {:<12,} Processing rate: {:<6,}/s HWM mem: {:<12} Work Queue: {:<12,}" print(fmt_msg.format( buf['reduce_items'], int(rate), bytes_fmt(buf['mem_snapshot']), buf['work_qsize'])) self.last_cnt = buf['reduce_items'] self.last_reduce_time = MPI.Wtime()
def epilogue(self): global taskloads self.wtime_ended = MPI.Wtime() taskloads = self.circle.comm.gather(self.reduce_items) if self.circle.rank == 0: if self.totalsize == 0: print("\nZero filesize detected, done.\n") return tlapse = self.wtime_ended - self.wtime_started rate = float(self.totalsize) / tlapse print("\nFCP Epilogue:\n") print("\t{:<20}{:<20}".format("Ending at:", utils.current_time())) print("\t{:<20}{:<20}".format("Completed in:", utils.conv_time(tlapse))) print("\t{:<20}{:<20}".format("Transfer Rate:", "%s/s" % bytes_fmt(rate))) print("\t{:<20}{:<20}".format("FCP Loads:", "%s" % taskloads))
def reduce_report(self, buf): # progress report # rate = (buf['cnt_files'] - self.last_cnt)/(MPI.Wtime() - self.last_reduce_time) # print("Processed objects: %s, estimated processing rate: %d/s" % (buf['cnt_files'], rate)) # self.last_cnt = buf['cnt_files'] rate = (buf['reduce_items'] - self.last_cnt) / \ (MPI.Wtime() - self.last_reduce_time) if py_version() == "py26": fmt_msg = "Scanned files: {0:<12} Processing rate: {1:<6}/s HWM mem: {2:<12} Work Queue: {3:<12}" else: fmt_msg = "Scanned files: {:<12,} Processing rate: {:<6,}/s HWM mem: {:<12} Work Queue: {:<12,}" print( fmt_msg.format(buf['reduce_items'], int(rate), bytes_fmt(buf['mem_snapshot']), buf['work_qsize'])) self.last_cnt = buf['reduce_items'] self.last_reduce_time = MPI.Wtime()
def epilogue(self): global taskloads self.wtime_ended = MPI.Wtime() taskloads = self.circle.comm.gather(self.reduce_items) if self.circle.rank == 0: if self.totalsize == 0: print("\nZero filesize detected, done.\n") return tlapse = self.wtime_ended - self.wtime_started rate = float(self.totalsize) / tlapse print("\nFCP Epilogue:\n") print("\t{:<20}{:<20}".format("Ending at:", utils.current_time())) print("\t{:<20}{:<20}".format("Completed in:", utils.conv_time(tlapse))) print("\t{:<20}{:<20}".format("Transfer Rate:", "%s/s" % bytes_fmt(rate))) print("\t{:<20}{:<20}".format("Use store chunksums:", "%s" % self.use_store)) print("\t{:<20}{:<20}".format("Use store workq:", "%s" % self.circle.use_store)) print("\t{:<20}{:<20}".format("FCP Loads:", "%s" % taskloads))
def gen_signature(bfsign, totalsize): """ Generate a signature for dataset, it assumes the checksum option is set and done """ if comm.rank == 0: print("\nAggregating dataset signature ...\n") tbegin = MPI.Wtime() sig = aggregate_checksums(bfsign) tend = MPI.Wtime() if comm.rank == 0: #print("\t{:<20}{:<20}".format("Aggregated chunks:", size)) print("\t{:<20}{:<20}".format("Running time:", utils.conv_time(tend - tbegin))) print("\t{:<20}{:<20}".format("SHA1 Signature:", sig)) with open(args.output, "w") as f: f.write("sha1: %s\n" % sig) f.write("chunksize: %s\n" % fcp.chunksize) f.write("fcp version: %s\n" % __version__) f.write("src: %s\n" % fcp.src) f.write("destination: %s\n" % fcp.dest) f.write("date: %s\n" % utils.current_time()) f.write("totoalsize: %s\n" % utils.bytes_fmt(totalsize))
def main(): global comm, args args = parse_and_bcast(comm, gen_parser) try: G.src = utils.check_src(args.path) except ValueError as e: err_and_exit("Error: %s not accessible" % e) G.use_store = args.use_store G.loglevel = args.loglevel hosts_cnt = tally_hosts() if comm.rank == 0: print("Running Parameters:\n") print("\t{:<20}{:<20}".format("FWALK version:", __version__)) print("\t{:<20}{:<20}".format("Num of hosts:", hosts_cnt)) print("\t{:<20}{:<20}".format("Num of processes:", MPI.COMM_WORLD.Get_size())) print("\t{:<20}{:<20}".format("Root path:", utils.choplist(G.src))) circle = Circle() treewalk = FWalk(circle, G.src) circle.begin(treewalk) if G.use_store: treewalk.flushdb() if args.stats: hist = global_histogram(treewalk) total = hist.sum() bucket_scale = 0.5 if comm.rank == 0: print("\nFileset histograms:\n") for idx, rightbound in enumerate(bins[1:]): percent = 100 * hist[idx] / float(total) star_count = int(bucket_scale * percent) print("\t{:<3}{:<15}{:<8}{:<8}{:<50}".format( "< ", utils.bytes_fmt(rightbound), hist[idx], "%0.2f%%" % percent, '∎' * star_count)) if args.stats: treewalk.flist.sort(lambda f1, f2: cmp(f1.st_size, f2.st_size), reverse=True) globaltops = comm.gather(treewalk.flist[:args.top]) if comm.rank == 0: globaltops = [item for sublist in globaltops for item in sublist] globaltops.sort(lambda f1, f2: cmp(f1.st_size, f2.st_size), reverse=True) if len(globaltops) < args.top: args.top = len(globaltops) print("\nStats, top %s files\n" % args.top) for i in xrange(args.top): print("\t{:15}{:<30}".format( utils.bytes_fmt(globaltops[i].st_size), globaltops[i].path)) treewalk.epilogue() treewalk.cleanup() circle.finalize()
def epilogue(self): self.total_tally() self.time_ended = MPI.Wtime() if self.circle.rank == 0: print("\nFprof epilogue:\n") if py_version() != "py26": fmt_msg1 = "\t{0:<25}{1:<20,}" # numeric else: # 2.6 compat fmt_msg1 = "\t{0:<25}{1:<20}" # numeric fmt_msg2 = "\t{0:<25}{1:<20}" # string fmt_msg3 = "\t{0:<25}{1:<20.2f}" # float print(fmt_msg1.format("Directory count:", Tally.total_dirs)) print(fmt_msg1.format("Sym links count:", Tally.total_symlinks)) print( fmt_msg1.format("Hard linked files:", Tally.total_nlinked_files)) print(fmt_msg1.format("File count:", Tally.total_files)) print(fmt_msg1.format("Zero byte files:", Tally.total_0byte_files)) print(fmt_msg1.format("Sparse files:", Tally.total_sparse)) if args.profdev: print(fmt_msg1.format("Dev file count:", Tally.devfile_cnt)) print( fmt_msg2.format("Dev file size:", bytes_fmt(Tally.devfile_sz))) print(fmt_msg1.format("Skipped count:", Tally.total_skipped)) print( fmt_msg2.format("Total file size:", bytes_fmt(Tally.total_filesize))) if args.cpr: compressed = float(Tally.total_blocks * 512) uncompressed = float(Tally.total_stat_filesize) ratio = uncompressed / compressed saving = 1 - compressed / uncompressed print(fmt_msg3.format("Compression Ratio:", ratio)) print(fmt_msg3.format("Compression Saving:", saving)) if Tally.total_files != 0: print( fmt_msg2.format( "Avg file size:", bytes_fmt(Tally.total_filesize / float(Tally.total_files)))) print(fmt_msg1.format("Max files within dir:", Tally.max_files)) elapsed_time = self.time_ended - self.time_started processing_rate = int( (Tally.total_files + Tally.total_dirs + Tally.total_symlinks + Tally.total_skipped) / elapsed_time) print( fmt_msg2.format("Tree walk time:", utils.conv_time(elapsed_time))) print( fmt_msg2.format("Scanning rate:", str(processing_rate) + "/s")) print(fmt_msg2.format("Fprof loads:", Tally.taskloads)) print("") if args.syslog: sendto_syslog("fprof.rootpath", "%s" % ",".join(G.src)) sendto_syslog("fprof.version", "%s" % __version__) sendto_syslog("fprof.revid", "%s" % __revid__) sendto_syslog("fprof.dir_count", Tally.total_dirs) sendto_syslog("fprof.sym_count", Tally.total_symlinks) sendto_syslog("fprof.file_count", Tally.total_files) sendto_syslog("fprof.total_file_size", bytes_fmt(Tally.total_filesize)) if Tally.total_files > 0: sendto_syslog( "fprof.avg_file_size", bytes_fmt(Tally.total_filesize / float(Tally.total_files))) sendto_syslog("fprof.walktime", utils.conv_time(elapsed_time)) sendto_syslog("fprof.scan_rate", processing_rate) return Tally.total_filesize
def main(): global comm, args fpipe.listen() args = parse_and_bcast(comm, gen_parser) try: G.src = utils.check_src2(args.path) except ValueError as e: err_and_exit("Error: %s not accessible" % e) G.loglevel = args.loglevel hosts_cnt = tally_hosts() if args.exclude: process_exclude_file() if comm.rank == 0: print("Running Parameters:\n") print("\t{:<20}{:<20}".format("fprof version:", __version__)) print("\t{:<20}{:<20}".format("Num of hosts:", hosts_cnt)) print("\t{:<20}{:<20}".format("Num of processes:", MPI.COMM_WORLD.Get_size())) print("\t{:<20}{:<20}".format("Root path:", G.src)) if args.exclude: print("\nExclusions:\n") for ele in EXCLUDE: print("\t %s" % ele) circle = Circle() if args.perprocess: circle.report_enabled = True else: circle.reduce_enabled = True treewalk = ProfileWalk(circle, G.src, perfile=args.perfile) circle.begin(treewalk) # we need the total file size to calculate GPFS efficiency total_file_size = treewalk.epilogue() msg1, msg2 = gen_histogram(total_file_size) if comm.rank == 0: sendto_syslog("fprof.filecount.hist", msg1) sendto_syslog("fprof.fsize_perc.hist", msg2) if args.top: topfiles = gather_topfiles() if comm.rank == 0: print("\nTop File Report:\n") # edge case: not enough files (< args.top) totaln = args.top if len(topfiles) > args.top else len(topfiles) for index, _ in enumerate(xrange(totaln)): size, path = topfiles[index] print("\t%s: %s (%s)" % (index + 1, path, utils.bytes_fmt(size))) print("") if args.gpfs_block_alloc: gpfs_blocks = gather_gpfs_blocks() if comm.rank == 0: print("\nGPFS Block Alloc Report:\n") print("\tinode size: %s" % args.inodesz) print("\tDII (data-in-inode) count: %s" % DII_COUNT) print("\tSubblocks: %s\n" % gpfs_blocks) for idx, bsz in enumerate(G.gpfs_block_size): gpfs_file_size = gpfs_blocks[idx] * G.gpfs_subs[idx] fmt_msg = "\tBlocksize: {:<6} Estimated Space: {:<20s} Efficiency: {:>6.2%}" if gpfs_file_size != 0: print(fmt_msg.format(bsz, bytes_fmt(gpfs_file_size), total_file_size/float(gpfs_file_size))) else: print(fmt_msg.format(bsz, bytes_fmt(gpfs_file_size), 0)) treewalk.cleanup() circle.finalize()
def set_adaptive_chunksize(self, totalsz): self.chunksize = utils.calc_chunksize(totalsz) if self.circle.rank == 0: print("Adaptive chunksize: %s" % bytes_fmt(self.chunksize))
def gen_histogram(total_file_size): """Generate file set histogram""" syslog_filecount_hist = "" syslog_fsizeperc_hist = "" bins_fmt = utils.bins_strs(G.bins) gather_histogram() if comm.rank == 0: total_num_of_files = hist.sum() if total_num_of_files == 0: err_and_exit("No histogram generated.\n") print("Fileset Histogram\n") if py_version() == "py26": msg = "\t{0:<3}{1:<15}{2:<15}{3:>10}{4:>15}{5:>15}" msg2 = "\t{0:<3}{1:<15}{2:<15}{3:>10}{4:>15}{5:>15}" else: msg = "\t{:<3}{:<15}{:<15,}{:>10}{:>15}{:>15}" msg2 = "\t{:<3}{:<15}{:<15}{:>10}{:>15}{:>15}" print( msg2.format("", "Buckets", "Num of Files", "Size", "%(Files)", "%(Size)")) print("") for idx, rightbound in enumerate(G.bins): percent_files = 100 * \ hist[idx] / \ float(total_num_of_files) if total_num_of_files != 0 else 0 percent_size = 100 * \ fsize[idx] / \ float(total_file_size) if total_file_size != 0 else 0 print( msg.format("<= ", utils.bytes_fmt(rightbound), hist[idx], utils.bytes_fmt(fsize[idx]), "%0.2f%%" % percent_files, "%0.2f%%" % percent_size)) # NO BLOCK HISTOGRAM # # bucket_scale = 0.30 # star_count = int(bucket_scale * percent) # print(msg.format("<= ", utils.bytes_fmt(rightbound), # hist[idx], # utils.bytes_fmt(fsize[idx]), # "%0.2f%%" % percent, '∎' * star_count)) syslog_filecount_hist += "%s = %s, " % (bins_fmt[idx], hist[idx]) syslog_fsizeperc_hist += "%s = %s, " % (bins_fmt[idx], percent_size) # special processing of last row percent_files = 100 * \ hist[-1] / \ float(total_num_of_files) if total_num_of_files != 0 else 0 percent_size = 100 * \ fsize[-1] / float(total_file_size) if total_file_size != 0 else 0 print( msg.format("> ", utils.bytes_fmt(rightbound), hist[-1], utils.bytes_fmt(fsize[-1]), "%0.2f%%" % percent_files, "%0.2f%%" % percent_size)) # star_count = int(bucket_scale * percent) # print(msg.format("> ", utils.bytes_fmt(rightbound), hist[-1], # utils.bytes_fmt(fsize[-1]), # "%0.2f%%" % percent, '∎' * star_count)) syslog_filecount_hist += "%s = %s" % (bins_fmt[-1], hist[-1]) syslog_fsizeperc_hist += "%s = %s" % (bins_fmt[-1], percent_size) # end of if comm.rank == 0 return syslog_filecount_hist, syslog_fsizeperc_hist
def main(): global comm, args, stripe_out, DIR_BINS, DIR_HIST fpipe.listen() args = parse_and_bcast(comm, gen_parser) try: G.src = utils.check_src2(args.path) except ValueError as e: err_and_exit("Error: %s not accessible" % e) G.memitem_threshold = args.item G.loglevel = args.loglevel hosts_cnt = tally_hosts() # doing directory profiling? if args.dirprof: # check the input if args.dirbins is None: # err_and_exit("Error: missing directory bin parameters: a sorted integer list\n") args.dirbins = [ 0, 10, 100, 1000, 10**4, 10**5, 10**6, 10**7, 10**8 ] else: myList = sorted(set(args.dirbins)) if myList != args.dirbins: err_and_exit("Error: duplicated, or unsorted bins: %s\n" % args.dirbins) DIR_BINS = args.dirbins DIR_HIST = [0] * (len(DIR_BINS) + 1) # Doing stripe analysis? lfs is not really bullet-proof way # we might need a better way of doing fstype check. if args.lustre_stripe: G.lfs_bin = lfs.check_lfs() G.stripe_threshold = utils.conv_unit(args.stripe_threshold) try: stripe_out = os.open(args.stripe_output, os.O_CREAT | os.O_WRONLY | os.O_APPEND) except: err_and_exit("Error: can't create stripe output: %s" % args.stripe_output) if args.exclude: process_exclude_file() if comm.rank == 0: print("Running Parameters:\n") print("\t{0:<20}{1:<20}".format("fprof version:", __version__)) print("\t{0:<20}{1:<20}".format("Full rev id:", __revid__)) print("\t{0:<20}{1:<20}".format("Num of hosts:", hosts_cnt)) print("\t{0:<20}{1:<20}".format("Num of processes:", MPI.COMM_WORLD.Get_size())) if args.syslog: print("\t{0:<20}{1:<20}".format("Syslog report: ", "yes")) else: print("\t{0:<20}{1:<20}".format("Syslog report: ", "no")) if args.dirprof: print("\t{0:<20}{1:<20}".format("Dir bins: ", args.dirbins)) if args.lustre_stripe: print("\t{0:<20}{1:<20}".format("Stripe analysis: ", "yes")) print("\t{0:<20}{1:<20}".format("Stripe threshold: ", args.stripe_threshold)) else: print("\t{0:<20}{1:<20}".format("Stripe analysis: ", "no")) print("\t{0:<20}{1:<20}".format("Root path:", G.src)) if args.exclude: print("\nExclusions:\n") for ele in EXCLUDE: print("\t %s" % ele) circle = Circle() if args.perprocess: circle.report_enabled = True else: circle.report_enabled = False if args.progress: circle.report_enabled = False circle.reduce_enabled = True treewalk = ProfileWalk(circle, G.src, perfile=args.perfile) circle.begin(treewalk) # we need the total file size to calculate GPFS efficiency total_file_size = treewalk.epilogue() msg1, msg2 = gen_histogram(total_file_size) if args.dirprof: gen_directory_histogram() if comm.rank == 0 and args.syslog: sendto_syslog("fprof.filecount.hist", msg1) sendto_syslog("fprof.fsize_perc.hist", msg2) if args.topn_files: topfiles = gather_topfiles() if comm.rank == 0: print("\nTop N File Report:\n") # edge case: not enough files (< args.top) totaln = args.topn_files if len( topfiles) > args.topn_files else len(topfiles) for index, _ in enumerate(xrange(totaln)): size, path = topfiles[index] print("\t%s: %s (%s)" % (index + 1, path, utils.bytes_fmt(size))) print("") if args.topn_dirs: topdirs = gather_topdirs() if comm.rank == 0: print("\nTop N Directory Report:\n") totaln = args.topn_dirs if len(topdirs) > args.topn_dirs else len( topdirs) for index, _ in enumerate(xrange(totaln)): size, path = topdirs[index] print("\t{0:}: {1:} ({2:,} items)".format( index + 1, path, size)) print("") if args.gpfs_block_alloc: gpfs_blocks = gather_gpfs_blocks() gather_gpfs_dii() if comm.rank == 0: print("\nGPFS Block Alloc Report:\n") print("\t{0:<15}{1:<4}".format("inode size:", args.inodesz)) print("\t{0:<25}{1:>15,}".format("DII (data-in-inode) count:", DII_COUNT)) print("\tSubblocks: %s\n" % gpfs_blocks) fmt_msg = "\tBlocksize: {0:<6} Estimated Space: {1:<20s} Efficiency: {2:>6.2%}" for idx, bsz in enumerate(G.gpfs_block_size): gpfs_file_size = gpfs_blocks[idx] * G.gpfs_subs[idx] if gpfs_file_size != 0: print( fmt_msg.format(bsz, bytes_fmt(gpfs_file_size), total_file_size / float(gpfs_file_size))) else: print(fmt_msg.format(bsz, bytes_fmt(gpfs_file_size), 0)) treewalk.cleanup() circle.finalize() if args.lustre_stripe and stripe_out: os.close(stripe_out) sp_workload = comm.gather(Tally.spcnt) if comm.rank == 0: print("Stripe workload total: %s, distribution: %s" % (sum(sp_workload), sp_workload))