def main(): global comm, args args = parse_and_bcast(comm, gen_parser) try: G.src = utils.check_src(args.path) except ValueError as e: err_and_exit("Error: %s not accessible" % e) G.use_store = args.use_store G.loglevel = args.loglevel hosts_cnt = tally_hosts() if comm.rank == 0: print("Running Parameters:\n") print("\t{:<20}{:<20}".format("FWALK version:", __version__)) print("\t{:<20}{:<20}".format("Num of hosts:", hosts_cnt)) print("\t{:<20}{:<20}".format("Num of processes:", MPI.COMM_WORLD.Get_size())) print("\t{:<20}{:<20}".format("Root path:", utils.choplist(G.src))) circle = Circle() treewalk = FWalk(circle, G.src) circle.begin(treewalk) if G.use_store: treewalk.flushdb() if args.stats: hist = global_histogram(treewalk) total = hist.sum() bucket_scale = 0.5 if comm.rank == 0: print("\nFileset histograms:\n") for idx, rightbound in enumerate(bins[1:]): percent = 100 * hist[idx] / float(total) star_count = int(bucket_scale * percent) print("\t{:<3}{:<15}{:<8}{:<8}{:<50}".format("< ", utils.bytes_fmt(rightbound), hist[idx], "%0.2f%%" % percent, '∎' * star_count)) if args.stats: treewalk.flist.sort(lambda f1, f2: cmp(f1.st_size, f2.st_size), reverse=True) globaltops = comm.gather(treewalk.flist[:args.top]) if comm.rank == 0: globaltops = [item for sublist in globaltops for item in sublist] globaltops.sort(lambda f1, f2: cmp(f1.st_size, f2.st_size), reverse=True) if len(globaltops) < args.top: args.top = len(globaltops) print("\nStats, top %s files\n" % args.top) for i in xrange(args.top): print("\t{:15}{:<30}".format(utils.bytes_fmt(globaltops[i].st_size), globaltops[i].path)) treewalk.epilogue() treewalk.cleanup() circle.finalize()
def main(): global comm, args args = parse_and_bcast(comm, gen_parser) try: G.src = utils.check_src2(args.path) except ValueError as e: err_and_exit("Error: %s not accessible" % e) G.loglevel = args.loglevel hosts_cnt = tally_hosts() if comm.rank == 0: print("Running Parameters:\n") print("\t{:<20}{:<20}".format("fprof version:", __version__)) print("\t{:<20}{:<20}".format("Num of hosts:", hosts_cnt)) print("\t{:<20}{:<20}".format("Num of processes:", MPI.COMM_WORLD.Get_size())) print("\t{:<20}{:<20}".format("Root path:", G.src)) circle = Circle() treewalk = ProfileWalk(circle, G.src, perfile=args.perfile) circle.begin(treewalk) gen_histogram() # we need the total file size to calculate GPFS efficiency total_file_size = treewalk.epilogue() if args.gpfs_block_alloc: gpfs_blocks = gather_gpfs_blocks() if comm.rank == 0: print("\nGPFS Block Alloc Report:\n") print("\tSubblocks: %s\n" % gpfs_blocks) for idx, bsz in enumerate(G.gpfs_block_size): gpfs_file_size = gpfs_blocks[idx] * G.gpfs_subs[idx] fmt_msg = "\tBlocksize: {:<6} Estimated Space: {:<20s} Efficiency: {:>6.0%}" if gpfs_file_size != 0: print(fmt_msg.format(bsz, bytes_fmt(gpfs_file_size), total_file_size/float(gpfs_file_size))) else: print(fmt_msg.format(bsz, bytes_fmt(gpfs_file_size), 0)) treewalk.cleanup() circle.finalize()
def main(): global args, log, circle, fcp, treewalk # This might be an overkill function signal.signal(signal.SIGINT, sig_handler) args = parse_and_bcast(comm, gen_parser) tally_hosts() G.loglevel = args.loglevel G.fix_opt = False if args.no_fixopt else True G.preserve = args.preserve G.resume = True if args.cpid else False G.reduce_interval = args.reduce_interval G.verbosity = args.verbosity G.am_root = True if os.geteuid() == 0 else False if args.signature: # with signature implies doing verify as well args.verify = True G.src, G.dest = check_source_and_target(args.src, args.dest) dbname = get_workq_name() circle = Circle() circle.dbname = dbname if args.rid: circle.resume = True args.signature = False # when recovery, no signature if not args.cpid: ts = utils.timestamp() args.cpid = circle.comm.bcast(ts) if circle.rank == 0: print("Running Parameters:\n") print("\t{:<25}{:<20}".format("Starting at:", utils.current_time())) print("\t{:<25}{:<20}".format("FCP version:", __version__)) print("\t{:<25}{:<20}".format("Source:", utils.choplist(G.src))) print("\t{:<25}{:<20}".format("Destination:", os.path.abspath(args.dest))) print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Num of Hosts:", num_of_hosts, "|", "Num of Processes:", comm.size)) print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Overwrite:", "%r" % args.force, "|", "Copy Verification:", "%r" % args.verify)) print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Dataset signature:", "%r" % args.signature, "|", "Stripe Preserve:", "%r" % G.preserve)) print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Checkpoint interval:", "%s" % utils.conv_time(args.cptime), "|", "Checkpoint ID:", "%s" % args.cpid)) # if args.verbosity > 0: print("\t{:<25}{:<20}".format("Copy Mode:", G.copytype)) fcp_start() if args.pause and args.verify: if circle.rank == 0: # raw_input("\n--> Press any key to continue ...\n") print("Pause, resume after %s seconds ..." % args.pause) sys.stdout.flush() time.sleep(args.pause) circle.comm.Barrier() # do checksum verification if args.verify: circle = Circle() pcheck = PVerify(circle, fcp, G.totalsize) circle.begin(pcheck) tally = pcheck.fail_tally() tally = comm.bcast(tally) if circle.rank == 0: print("") if tally == 0: print("\t{:<20}{:<20}".format("Result:", "PASS")) else: print("\t{:<20}{:<20}".format("Result:", "FAILED")) comm.Barrier() if args.signature and tally == 0: gen_signature(fcp, G.totalsize) # fix permission comm.Barrier() if G.fix_opt and treewalk: if comm.rank == 0: print("\nFixing ownership and permissions ...") fix_opt(treewalk) if treewalk: treewalk.cleanup() if fcp: fcp.epilogue() fcp.cleanup() # if circle: # circle.finalize(cleanup=True) # TODO: a close file error can happen when circle.finalize() # if isinstance(circle.workq, DbStore): circle.workq.cleanup()
def main(): global args, log, circle, fcp, treewalk # This might be an overkill function signal.signal(signal.SIGINT, sig_handler) args = parse_and_bcast(comm, gen_parser) tally_hosts() G.loglevel = args.loglevel G.fix_opt = False if args.no_fixopt else True G.preserve = args.preserve G.resume = True if args.cpid else False G.reduce_interval = args.reduce_interval G.verbosity = args.verbosity G.am_root = True if os.geteuid() == 0 else False G.memitem_threshold = args.item if args.signature: # with signature implies doing verify as well args.verify = True if args.rid: G.resume = True args.force = True G.rid = args.rid args.signature = False # when recovery, no signature if not args.cpid: ts = utils.timestamp() args.cpid = MPI.COMM_WORLD.bcast(ts) G.tempdir = os.path.join(os.getcwd(),(".pcircle" + args.cpid)) if not os.path.exists(G.tempdir): try: os.mkdir(G.tempdir) except OSError: pass G.src, G.dest = check_source_and_target(args.src, args.dest) dbname = get_workq_name() circle = Circle(dbname="fwalk") #circle.dbname = dbname global oflimit if num_of_hosts != 0: max_ofile, _ = resource.getrlimit(resource.RLIMIT_NOFILE) procs_per_host = circle.size // num_of_hosts oflimit = ((max_ofile - 64) // procs_per_host) // 2 if oflimit < 8: oflimit = 8 if circle.rank == 0: print("Running Parameters:\n") print("\t{:<25}{:<20}".format("Starting at:", utils.current_time())) print("\t{:<25}{:<20}".format("FCP version:", __version__)) print("\t{:<25}{:<20}".format("Source:", utils.choplist(G.src))) print("\t{:<25}{:<20}".format("Destination:", os.path.abspath(args.dest))) print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Num of Hosts:", num_of_hosts, "|", "Num of Processes:", comm.size)) print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Overwrite:", "%r" % args.force, "|", "Copy Verification:", "%r" % args.verify)) print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Dataset signature:", "%r" % args.signature, "|", "Stripe Preserve:", "%r" % G.preserve)) print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Checkpoint interval:", "%s" % utils.conv_time(args.cptime), "|", "Checkpoint ID:", "%s" % args.cpid)) print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Items in memory: ", " % r" % G.memitem_threshold, "|", "O file limit", "%s" % oflimit)) # if args.verbosity > 0: print("\t{:<25}{:<20}".format("Copy Mode:", G.copytype)) fcp_start() if args.pause and args.verify: if circle.rank == 0: # raw_input("\n--> Press any key to continue ...\n") print("Pause, resume after %s seconds ..." % args.pause) sys.stdout.flush() time.sleep(args.pause) circle.comm.Barrier() # do checksum verification if args.verify: circle = Circle(dbname="verify") pcheck = PVerify(circle, fcp, G.total_chunks, T.total_filesize, args.signature) circle.begin(pcheck) circle.finalize() tally = pcheck.fail_tally() tally = comm.bcast(tally) if circle.rank == 0: print("") if tally == 0: print("\t{:<20}{:<20}".format("Verify result:", "PASS")) else: print("\t{:<20}{:<20}".format("Verify result:", "FAILED")) comm.Barrier() if args.signature and tally == 0: gen_signature(pcheck.bfsign, T.total_filesize) # fix permission comm.Barrier() if G.fix_opt and treewalk: if comm.rank == 0: print("\nFixing ownership and permissions ...") fix_opt(treewalk) if treewalk: treewalk.cleanup() if fcp: fcp.cleanup() #if circle: # circle.finalize(cleanup=True) comm.Barrier() if comm.rank == 0: try: os.rmdir(G.tempdir) except: pass
def main(): global comm, args args = parse_and_bcast(comm, gen_parser) try: G.src = utils.check_src(args.path) except ValueError as e: err_and_exit("Error: %s not accessible" % e) G.use_store = args.use_store G.loglevel = args.loglevel hosts_cnt = tally_hosts() if comm.rank == 0: print("Running Parameters:\n") print("\t{:<20}{:<20}".format("FWALK version:", __version__)) print("\t{:<20}{:<20}".format("Num of hosts:", hosts_cnt)) print("\t{:<20}{:<20}".format("Num of processes:", MPI.COMM_WORLD.Get_size())) print("\t{:<20}{:<20}".format("Root path:", utils.choplist(G.src))) circle = Circle() treewalk = FWalk(circle, G.src) circle.begin(treewalk) if G.use_store: treewalk.flushdb() if args.stats: hist = global_histogram(treewalk) total = hist.sum() bucket_scale = 0.5 if comm.rank == 0: print("\nFileset histograms:\n") for idx, rightbound in enumerate(bins[1:]): percent = 100 * hist[idx] / float(total) star_count = int(bucket_scale * percent) print("\t{:<3}{:<15}{:<8}{:<8}{:<50}".format( "< ", utils.bytes_fmt(rightbound), hist[idx], "%0.2f%%" % percent, '∎' * star_count)) if args.stats: treewalk.flist.sort(lambda f1, f2: cmp(f1.st_size, f2.st_size), reverse=True) globaltops = comm.gather(treewalk.flist[:args.top]) if comm.rank == 0: globaltops = [item for sublist in globaltops for item in sublist] globaltops.sort(lambda f1, f2: cmp(f1.st_size, f2.st_size), reverse=True) if len(globaltops) < args.top: args.top = len(globaltops) print("\nStats, top %s files\n" % args.top) for i in xrange(args.top): print("\t{:15}{:<30}".format( utils.bytes_fmt(globaltops[i].st_size), globaltops[i].path)) treewalk.epilogue() treewalk.cleanup() circle.finalize()
def main(): global comm, args, stripe_out, DIR_BINS, DIR_HIST fpipe.listen() args = parse_and_bcast(comm, gen_parser) try: G.src = utils.check_src2(args.path) except ValueError as e: err_and_exit("Error: %s not accessible" % e) G.memitem_threshold = args.item G.loglevel = args.loglevel hosts_cnt = tally_hosts() # doing directory profiling? if args.dirprof: # check the input if args.dirbins is None: # err_and_exit("Error: missing directory bin parameters: a sorted integer list\n") args.dirbins = [ 0, 10, 100, 1000, 10**4, 10**5, 10**6, 10**7, 10**8 ] else: myList = sorted(set(args.dirbins)) if myList != args.dirbins: err_and_exit("Error: duplicated, or unsorted bins: %s\n" % args.dirbins) DIR_BINS = args.dirbins DIR_HIST = [0] * (len(DIR_BINS) + 1) # Doing stripe analysis? lfs is not really bullet-proof way # we might need a better way of doing fstype check. if args.lustre_stripe: G.lfs_bin = lfs.check_lfs() G.stripe_threshold = utils.conv_unit(args.stripe_threshold) try: stripe_out = os.open(args.stripe_output, os.O_CREAT | os.O_WRONLY | os.O_APPEND) except: err_and_exit("Error: can't create stripe output: %s" % args.stripe_output) if args.exclude: process_exclude_file() if comm.rank == 0: print("Running Parameters:\n") print("\t{0:<20}{1:<20}".format("fprof version:", __version__)) print("\t{0:<20}{1:<20}".format("Full rev id:", __revid__)) print("\t{0:<20}{1:<20}".format("Num of hosts:", hosts_cnt)) print("\t{0:<20}{1:<20}".format("Num of processes:", MPI.COMM_WORLD.Get_size())) if args.syslog: print("\t{0:<20}{1:<20}".format("Syslog report: ", "yes")) else: print("\t{0:<20}{1:<20}".format("Syslog report: ", "no")) if args.dirprof: print("\t{0:<20}{1:<20}".format("Dir bins: ", args.dirbins)) if args.lustre_stripe: print("\t{0:<20}{1:<20}".format("Stripe analysis: ", "yes")) print("\t{0:<20}{1:<20}".format("Stripe threshold: ", args.stripe_threshold)) else: print("\t{0:<20}{1:<20}".format("Stripe analysis: ", "no")) print("\t{0:<20}{1:<20}".format("Root path:", G.src)) if args.exclude: print("\nExclusions:\n") for ele in EXCLUDE: print("\t %s" % ele) circle = Circle() if args.perprocess: circle.report_enabled = True else: circle.report_enabled = False if args.progress: circle.report_enabled = False circle.reduce_enabled = True treewalk = ProfileWalk(circle, G.src, perfile=args.perfile) circle.begin(treewalk) # we need the total file size to calculate GPFS efficiency total_file_size = treewalk.epilogue() msg1, msg2 = gen_histogram(total_file_size) if args.dirprof: gen_directory_histogram() if comm.rank == 0 and args.syslog: sendto_syslog("fprof.filecount.hist", msg1) sendto_syslog("fprof.fsize_perc.hist", msg2) if args.topn_files: topfiles = gather_topfiles() if comm.rank == 0: print("\nTop N File Report:\n") # edge case: not enough files (< args.top) totaln = args.topn_files if len( topfiles) > args.topn_files else len(topfiles) for index, _ in enumerate(xrange(totaln)): size, path = topfiles[index] print("\t%s: %s (%s)" % (index + 1, path, utils.bytes_fmt(size))) print("") if args.topn_dirs: topdirs = gather_topdirs() if comm.rank == 0: print("\nTop N Directory Report:\n") totaln = args.topn_dirs if len(topdirs) > args.topn_dirs else len( topdirs) for index, _ in enumerate(xrange(totaln)): size, path = topdirs[index] print("\t{0:}: {1:} ({2:,} items)".format( index + 1, path, size)) print("") if args.gpfs_block_alloc: gpfs_blocks = gather_gpfs_blocks() gather_gpfs_dii() if comm.rank == 0: print("\nGPFS Block Alloc Report:\n") print("\t{0:<15}{1:<4}".format("inode size:", args.inodesz)) print("\t{0:<25}{1:>15,}".format("DII (data-in-inode) count:", DII_COUNT)) print("\tSubblocks: %s\n" % gpfs_blocks) fmt_msg = "\tBlocksize: {0:<6} Estimated Space: {1:<20s} Efficiency: {2:>6.2%}" for idx, bsz in enumerate(G.gpfs_block_size): gpfs_file_size = gpfs_blocks[idx] * G.gpfs_subs[idx] if gpfs_file_size != 0: print( fmt_msg.format(bsz, bytes_fmt(gpfs_file_size), total_file_size / float(gpfs_file_size))) else: print(fmt_msg.format(bsz, bytes_fmt(gpfs_file_size), 0)) treewalk.cleanup() circle.finalize() if args.lustre_stripe and stripe_out: os.close(stripe_out) sp_workload = comm.gather(Tally.spcnt) if comm.rank == 0: print("Stripe workload total: %s, distribution: %s" % (sum(sp_workload), sp_workload))
def main(): global comm, args fpipe.listen() args = parse_and_bcast(comm, gen_parser) try: G.src = utils.check_src2(args.path) except ValueError as e: err_and_exit("Error: %s not accessible" % e) G.loglevel = args.loglevel hosts_cnt = tally_hosts() if args.exclude: process_exclude_file() if comm.rank == 0: print("Running Parameters:\n") print("\t{:<20}{:<20}".format("fprof version:", __version__)) print("\t{:<20}{:<20}".format("Num of hosts:", hosts_cnt)) print("\t{:<20}{:<20}".format("Num of processes:", MPI.COMM_WORLD.Get_size())) print("\t{:<20}{:<20}".format("Root path:", G.src)) if args.exclude: print("\nExclusions:\n") for ele in EXCLUDE: print("\t %s" % ele) circle = Circle() if args.perprocess: circle.report_enabled = True else: circle.reduce_enabled = True treewalk = ProfileWalk(circle, G.src, perfile=args.perfile) circle.begin(treewalk) # we need the total file size to calculate GPFS efficiency total_file_size = treewalk.epilogue() msg1, msg2 = gen_histogram(total_file_size) if comm.rank == 0: sendto_syslog("fprof.filecount.hist", msg1) sendto_syslog("fprof.fsize_perc.hist", msg2) if args.top: topfiles = gather_topfiles() if comm.rank == 0: print("\nTop File Report:\n") # edge case: not enough files (< args.top) totaln = args.top if len(topfiles) > args.top else len(topfiles) for index, _ in enumerate(xrange(totaln)): size, path = topfiles[index] print("\t%s: %s (%s)" % (index + 1, path, utils.bytes_fmt(size))) print("") if args.gpfs_block_alloc: gpfs_blocks = gather_gpfs_blocks() if comm.rank == 0: print("\nGPFS Block Alloc Report:\n") print("\tinode size: %s" % args.inodesz) print("\tDII (data-in-inode) count: %s" % DII_COUNT) print("\tSubblocks: %s\n" % gpfs_blocks) for idx, bsz in enumerate(G.gpfs_block_size): gpfs_file_size = gpfs_blocks[idx] * G.gpfs_subs[idx] fmt_msg = "\tBlocksize: {:<6} Estimated Space: {:<20s} Efficiency: {:>6.2%}" if gpfs_file_size != 0: print(fmt_msg.format(bsz, bytes_fmt(gpfs_file_size), total_file_size/float(gpfs_file_size))) else: print(fmt_msg.format(bsz, bytes_fmt(gpfs_file_size), 0)) treewalk.cleanup() circle.finalize()