def nextobject(self): """Yields a list of objects. Returns keywords, literals, strings, numbers, arrays and dictionaries. Arrays and dictionaries are represented as Python lists and dictionaries. """ while not self.results: (pos, token) = self.nexttoken() #print (pos,token), (self.curtype, self.curstack) if isinstance(token, (int, long, float, bool, str, PSLiteral)): # normal token self.push((pos, token)) elif token == KEYWORD_ARRAY_BEGIN: # begin array self.start_type(pos, 'a') elif token == KEYWORD_ARRAY_END: # end array try: self.push(self.end_type('a')) except PSTypeError: if STRICT: raise elif token == KEYWORD_DICT_BEGIN: # begin dictionary self.start_type(pos, 'd') elif token == KEYWORD_DICT_END: # end dictionary try: (pos, objs) = self.end_type('d') if len(objs) % 2 != 0: raise PSSyntaxError('Invalid dictionary construct: %r' % objs) # construct a Python dictionary. d = dict((literal_name(k), v) for (k, v) in choplist(2, objs) if v is not None) self.push((pos, d)) except PSTypeError: if STRICT: raise elif token == KEYWORD_PROC_BEGIN: # begin proc self.start_type(pos, 'p') elif token == KEYWORD_PROC_END: # end proc try: self.push(self.end_type('p')) except PSTypeError: if STRICT: raise else: if 2 <= self.debug: print >>sys.stderr, 'do_keyword: pos=%r, token=%r, stack=%r' % \ (pos, token, self.curstack) self.do_keyword(pos, token) if self.context: continue else: self.flush() obj = self.results.pop(0) if 2 <= self.debug: print >>sys.stderr, 'nextobject: %r' % (obj,) return obj
def main(): global comm, args args = parse_and_bcast(comm, gen_parser) try: G.src = utils.check_src(args.path) except ValueError as e: err_and_exit("Error: %s not accessible" % e) G.use_store = args.use_store G.loglevel = args.loglevel hosts_cnt = tally_hosts() if comm.rank == 0: print("Running Parameters:\n") print("\t{:<20}{:<20}".format("FWALK version:", __version__)) print("\t{:<20}{:<20}".format("Num of hosts:", hosts_cnt)) print("\t{:<20}{:<20}".format("Num of processes:", MPI.COMM_WORLD.Get_size())) print("\t{:<20}{:<20}".format("Root path:", utils.choplist(G.src))) circle = Circle() treewalk = FWalk(circle, G.src) circle.begin(treewalk) if G.use_store: treewalk.flushdb() if args.stats: hist = global_histogram(treewalk) total = hist.sum() bucket_scale = 0.5 if comm.rank == 0: print("\nFileset histograms:\n") for idx, rightbound in enumerate(bins[1:]): percent = 100 * hist[idx] / float(total) star_count = int(bucket_scale * percent) print("\t{:<3}{:<15}{:<8}{:<8}{:<50}".format("< ", utils.bytes_fmt(rightbound), hist[idx], "%0.2f%%" % percent, '∎' * star_count)) if args.stats: treewalk.flist.sort(lambda f1, f2: cmp(f1.st_size, f2.st_size), reverse=True) globaltops = comm.gather(treewalk.flist[:args.top]) if comm.rank == 0: globaltops = [item for sublist in globaltops for item in sublist] globaltops.sort(lambda f1, f2: cmp(f1.st_size, f2.st_size), reverse=True) if len(globaltops) < args.top: args.top = len(globaltops) print("\nStats, top %s files\n" % args.top) for i in xrange(args.top): print("\t{:15}{:<30}".format(utils.bytes_fmt(globaltops[i].st_size), globaltops[i].path)) treewalk.epilogue() treewalk.cleanup() circle.finalize()
def lookup(d): if 'Limits' in d: (k1,k2) = list_value(d['Limits']) if key < k1 or k2 < key: return None if 'Names' in d: objs = list_value(d['Names']) names = dict(choplist(2, objs)) return names[key] if 'Kids' in d: for c in list_value(d['Kids']): v = lookup(dict_value(c)) if v: return v raise KeyError((cat,key))
def lookup(d): if 'Limits' in d: (k1, k2) = list_value(d['Limits']) if key < k1 or k2 < key: return None if 'Names' in d: objs = list_value(d['Names']) names = dict(choplist(2, objs)) return names[key] if 'Kids' in d: for c in list_value(d['Kids']): v = lookup(dict_value(c)) if v: return v raise KeyError((cat, key))
def nextobject(self): ''' Yields a list of objects: keywords, literals, strings, numbers, arrays and dictionaries. Arrays and dictionaries are represented as Python sequence and dictionaries. ''' while not self.results: (pos, token) = self.nexttoken() #print (pos,token), (self.curtype, self.curstack) if (isinstance(token, int) or isinstance(token, float) or isinstance(token, bool) or isinstance(token, str) or isinstance(token, PSLiteral)): # normal token self.push((pos, token)) elif token == KEYWORD_ARRAY_BEGIN: # begin array self.start_type(pos, 'a') elif token == KEYWORD_ARRAY_END: # end array try: self.push(self.end_type('a')) except PSTypeError: if STRICT: raise elif token == KEYWORD_DICT_BEGIN: # begin dictionary self.start_type(pos, 'd') elif token == KEYWORD_DICT_END: # end dictionary try: (pos, objs) = self.end_type('d') if len(objs) % 2 != 0: raise PSSyntaxError('invalid dictionary construct: %r' % objs) d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs)) self.push((pos, d)) except PSTypeError: if STRICT: raise else: if 2 <= self.debug: print >>stderr, 'do_keyword: pos=%r, token=%r, stack=%r' % \ (pos, token, self.curstack) self.do_keyword(pos, token) if self.context: continue else: self.flush() obj = self.results.pop(0) if 2 <= self.debug: print >>stderr, 'nextobject: %r' % (obj,) return obj
def do_keyword(self, pos, token): if token is self.KEYWORD_BI: # inline image within a content stream self.start_type(pos, 'inline') elif token is self.KEYWORD_ID: try: (_, objs) = self.end_type('inline') if len(objs) % 2 != 0: raise PSTypeError('Invalid dictionary construct: %r' % objs) d = dict((literal_name(k), v) for (k, v) in choplist(2, objs)) (pos, data) = self.get_inline_data(pos + len('ID ')) obj = PDFStream(d, data) self.push((pos, obj)) self.push((pos, self.KEYWORD_EI)) except PSTypeError, e: handle_error(type(e), str(e))
def get_widths2(seq): widths = {} r = [] for v in seq: if isinstance(v, list): if r: char1 = r[-1] for (i,(w,vx,vy)) in enumerate(choplist(3,v)): widths[char1+i] = (w,(vx,vy)) r = [] elif isinstance(v, int): r.append(v) if len(r) == 5: (char1,char2,w,vx,vy) = r for i in xrange(char1, char2+1): widths[i] = (w,(vx,vy)) r = [] return widths
def load(self, parser): (_, objid) = parser.nexttoken() # ignored (_, genno) = parser.nexttoken() # ignored (_, kwd) = parser.nexttoken() (_, stream) = parser.nextobject() if not isinstance(stream, PDFStream) or stream['Type'] is not LITERAL_XREF: raise PDFNoValidXRef('Invalid PDF stream spec.') size = stream['Size'] index_array = stream.get('Index', (0, size)) if len(index_array) % 2 != 0: raise PDFSyntaxError('Invalid index number') self.objid_ranges.extend(ObjIdRange(start, nobjs) for (start, nobjs) in choplist(2, index_array)) (self.fl1, self.fl2, self.fl3) = stream['W'] self.data = stream.get_data() self.entlen = self.fl1 + self.fl2 + self.fl3 self.trailer = stream.attrs log.info('xref stream: objid=%s, fields=%d,%d,%d', ', '.join(map(repr, self.objid_ranges)), self.fl1, self.fl2, self.fl3)
def do_keyword(self, pos, token): if token is self.KEYWORD_BI: # inline image within a content stream self.start_type(pos, 'inline') elif token is self.KEYWORD_ID: try: (_, objs) = self.end_type('inline') if len(objs) % 2 != 0: raise PSTypeError('Invalid dictionary construct: %r' % objs) d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs) ) (pos, data) = self.get_inline_data(pos+len('ID ')) obj = PDFStream(d, data) self.push((pos, obj)) self.push((pos, self.KEYWORD_EI)) except PSTypeError: if STRICT: raise else: self.push((pos, token)) return
def do_keyword(self, pos, token): if token is self.KEYWORD_BI: # inline image within a content stream self.start_type(pos, "inline") elif token is self.KEYWORD_ID: try: (_, objs) = self.end_type("inline") if len(objs) % 2 != 0: raise PSTypeError("Invalid dictionary construct: %r" % objs) d = dict((literal_name(k), v) for (k, v) in choplist(2, objs)) (pos, data) = self.get_inline_data(pos + len("ID ")) obj = PDFStream(d, data) self.push((pos, obj)) self.push((pos, self.KEYWORD_EI)) except PSTypeError: if STRICT: raise else: self.push((pos, token)) return
def load(self, parser, debug=0): (_, objid) = parser.nexttoken() # ignored (_, genno) = parser.nexttoken() # ignored (_, kwd) = parser.nexttoken() (_, stream) = parser.nextobject() if not isinstance(stream, PDFStream) or stream['Type'] is not LITERAL_XREF: raise PDFNoValidXRef('Invalid PDF stream spec.') size = stream['Size'] index_array = stream.get('Index', (0, size)) if len(index_array) % 2 != 0: raise PDFSyntaxError('Invalid index number') self.ranges.extend(choplist(2, index_array)) (self.fl1, self.fl2, self.fl3) = stream['W'] self.data = stream.get_data() self.entlen = self.fl1+self.fl2+self.fl3 self.trailer = stream.attrs if 1 <= debug: print >>sys.stderr, ('xref stream: objid=%s, fields=%d,%d,%d' % (', '.join(map(repr, self.ranges)), self.fl1, self.fl2, self.fl3)) return
def main(): global args, log, circle, fcp, treewalk # This might be an overkill function signal.signal(signal.SIGINT, sig_handler) args = parse_and_bcast(comm, gen_parser) tally_hosts() G.loglevel = args.loglevel G.fix_opt = False if args.no_fixopt else True G.preserve = args.preserve G.resume = True if args.cpid else False G.reduce_interval = args.reduce_interval G.verbosity = args.verbosity G.am_root = True if os.geteuid() == 0 else False G.memitem_threshold = args.item if args.signature: # with signature implies doing verify as well args.verify = True if args.rid: G.resume = True args.force = True G.rid = args.rid args.signature = False # when recovery, no signature if not args.cpid: ts = utils.timestamp() args.cpid = MPI.COMM_WORLD.bcast(ts) G.tempdir = os.path.join(os.getcwd(),(".pcircle" + args.cpid)) if not os.path.exists(G.tempdir): try: os.mkdir(G.tempdir) except OSError: pass G.src, G.dest = check_source_and_target(args.src, args.dest) dbname = get_workq_name() circle = Circle(dbname="fwalk") #circle.dbname = dbname global oflimit if num_of_hosts != 0: max_ofile, _ = resource.getrlimit(resource.RLIMIT_NOFILE) procs_per_host = circle.size // num_of_hosts oflimit = ((max_ofile - 64) // procs_per_host) // 2 if oflimit < 8: oflimit = 8 if circle.rank == 0: print("Running Parameters:\n") print("\t{:<25}{:<20}".format("Starting at:", utils.current_time())) print("\t{:<25}{:<20}".format("FCP version:", __version__)) print("\t{:<25}{:<20}".format("Source:", utils.choplist(G.src))) print("\t{:<25}{:<20}".format("Destination:", os.path.abspath(args.dest))) print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Num of Hosts:", num_of_hosts, "|", "Num of Processes:", comm.size)) print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Overwrite:", "%r" % args.force, "|", "Copy Verification:", "%r" % args.verify)) print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Dataset signature:", "%r" % args.signature, "|", "Stripe Preserve:", "%r" % G.preserve)) print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Checkpoint interval:", "%s" % utils.conv_time(args.cptime), "|", "Checkpoint ID:", "%s" % args.cpid)) print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Items in memory: ", " % r" % G.memitem_threshold, "|", "O file limit", "%s" % oflimit)) # if args.verbosity > 0: print("\t{:<25}{:<20}".format("Copy Mode:", G.copytype)) fcp_start() if args.pause and args.verify: if circle.rank == 0: # raw_input("\n--> Press any key to continue ...\n") print("Pause, resume after %s seconds ..." % args.pause) sys.stdout.flush() time.sleep(args.pause) circle.comm.Barrier() # do checksum verification if args.verify: circle = Circle(dbname="verify") pcheck = PVerify(circle, fcp, G.total_chunks, T.total_filesize, args.signature) circle.begin(pcheck) circle.finalize() tally = pcheck.fail_tally() tally = comm.bcast(tally) if circle.rank == 0: print("") if tally == 0: print("\t{:<20}{:<20}".format("Verify result:", "PASS")) else: print("\t{:<20}{:<20}".format("Verify result:", "FAILED")) comm.Barrier() if args.signature and tally == 0: gen_signature(pcheck.bfsign, T.total_filesize) # fix permission comm.Barrier() if G.fix_opt and treewalk: if comm.rank == 0: print("\nFixing ownership and permissions ...") fix_opt(treewalk) if treewalk: treewalk.cleanup() if fcp: fcp.cleanup() #if circle: # circle.finalize(cleanup=True) comm.Barrier() if comm.rank == 0: try: os.rmdir(G.tempdir) except: pass
def main(): global args, log, circle, fcp, treewalk # This might be an overkill function signal.signal(signal.SIGINT, sig_handler) args = parse_and_bcast(comm, gen_parser) tally_hosts() G.loglevel = args.loglevel G.fix_opt = False if args.no_fixopt else True G.preserve = args.preserve G.resume = True if args.cpid else False G.reduce_interval = args.reduce_interval G.verbosity = args.verbosity G.am_root = True if os.geteuid() == 0 else False if args.signature: # with signature implies doing verify as well args.verify = True G.src, G.dest = check_source_and_target(args.src, args.dest) dbname = get_workq_name() circle = Circle() circle.dbname = dbname if args.rid: circle.resume = True args.signature = False # when recovery, no signature if not args.cpid: ts = utils.timestamp() args.cpid = circle.comm.bcast(ts) if circle.rank == 0: print("Running Parameters:\n") print("\t{:<25}{:<20}".format("Starting at:", utils.current_time())) print("\t{:<25}{:<20}".format("FCP version:", __version__)) print("\t{:<25}{:<20}".format("Source:", utils.choplist(G.src))) print("\t{:<25}{:<20}".format("Destination:", os.path.abspath(args.dest))) print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Num of Hosts:", num_of_hosts, "|", "Num of Processes:", comm.size)) print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Overwrite:", "%r" % args.force, "|", "Copy Verification:", "%r" % args.verify)) print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Dataset signature:", "%r" % args.signature, "|", "Stripe Preserve:", "%r" % G.preserve)) print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Checkpoint interval:", "%s" % utils.conv_time(args.cptime), "|", "Checkpoint ID:", "%s" % args.cpid)) # if args.verbosity > 0: print("\t{:<25}{:<20}".format("Copy Mode:", G.copytype)) fcp_start() if args.pause and args.verify: if circle.rank == 0: # raw_input("\n--> Press any key to continue ...\n") print("Pause, resume after %s seconds ..." % args.pause) sys.stdout.flush() time.sleep(args.pause) circle.comm.Barrier() # do checksum verification if args.verify: circle = Circle() pcheck = PVerify(circle, fcp, G.totalsize) circle.begin(pcheck) tally = pcheck.fail_tally() tally = comm.bcast(tally) if circle.rank == 0: print("") if tally == 0: print("\t{:<20}{:<20}".format("Result:", "PASS")) else: print("\t{:<20}{:<20}".format("Result:", "FAILED")) comm.Barrier() if args.signature and tally == 0: gen_signature(fcp, G.totalsize) # fix permission comm.Barrier() if G.fix_opt and treewalk: if comm.rank == 0: print("\nFixing ownership and permissions ...") fix_opt(treewalk) if treewalk: treewalk.cleanup() if fcp: fcp.epilogue() fcp.cleanup() # if circle: # circle.finalize(cleanup=True) # TODO: a close file error can happen when circle.finalize() # if isinstance(circle.workq, DbStore): circle.workq.cleanup()
def do_keyword(self, pos, token): name = token.name if name == 'begincmap': self._in_cmap = True self.popall() return elif name == 'endcmap': self._in_cmap = False return if not self._in_cmap: return # if name == 'def': try: ((_, k), (_, v)) = self.pop(2) self.cmap.set_attr(literal_name(k), v) except PSSyntaxError: pass return if name == 'usecmap': try: ((_, cmapname), ) = self.pop(1) self.cmap.use_cmap(CMapDB.get_cmap(literal_name(cmapname))) except PSSyntaxError: pass except CMapDB.CMapNotFound: pass return if name == 'begincodespacerange': self.popall() return if name == 'endcodespacerange': self.popall() return if name == 'begincidrange': self.popall() return if name == 'endcidrange': objs = [obj for (__, obj) in self.popall()] for (s, e, cid) in choplist(3, objs): if (not isinstance(s, str) or not isinstance(e, str) or not isinstance(cid, int) or len(s) != len(e)): continue sprefix = s[:-4] eprefix = e[:-4] if sprefix != eprefix: continue svar = s[-4:] evar = e[-4:] s1 = nunpack(svar) e1 = nunpack(evar) vlen = len(svar) #assert s1 <= e1 for i in xrange(e1 - s1 + 1): x = sprefix + struct.pack('>L', s1 + i)[-vlen:] self.cmap.add_code2cid(x, cid + i) return if name == 'begincidchar': self.popall() return if name == 'endcidchar': objs = [obj for (__, obj) in self.popall()] for (cid, code) in choplist(2, objs): if isinstance(code, str) and isinstance(cid, str): self.cmap.add_code2cid(code, nunpack(cid)) return if name == 'beginbfrange': self.popall() return if name == 'endbfrange': objs = [obj for (__, obj) in self.popall()] for (s, e, code) in choplist(3, objs): if (not isinstance(s, str) or not isinstance(e, str) or len(s) != len(e)): continue s1 = nunpack(s) e1 = nunpack(e) #assert s1 <= e1 if isinstance(code, list): for i in xrange(e1 - s1 + 1): self.cmap.add_cid2unichr(s1 + i, code[i]) else: var = code[-4:] base = nunpack(var) prefix = code[:-4] vlen = len(var) for i in xrange(e1 - s1 + 1): x = prefix + struct.pack('>L', base + i)[-vlen:] self.cmap.add_cid2unichr(s1 + i, x) return if name == 'beginbfchar': self.popall() return if name == 'endbfchar': objs = [obj for (__, obj) in self.popall()] for (cid, code) in choplist(2, objs): if isinstance(cid, str) and isinstance(code, str): self.cmap.add_cid2unichr(nunpack(cid), code) return if name == 'beginnotdefrange': self.popall() return if name == 'endnotdefrange': self.popall() return self.push((pos, token)) return
def do_keyword(self, pos, token): name = token.name if name == 'begincmap': self._in_cmap = True self.popall() return elif name == 'endcmap': self._in_cmap = False return if not self._in_cmap: return # if name == 'def': try: ((_,k),(_,v)) = self.pop(2) self.cmap.set_attr(literal_name(k), v) except PSSyntaxError: pass return if name == 'usecmap': try: ((_,cmapname),) = self.pop(1) self.cmap.use_cmap(CMapDB.get_cmap(literal_name(cmapname))) except PSSyntaxError: pass except CMapDB.CMapNotFound: pass return if name == 'begincodespacerange': self.popall() return if name == 'endcodespacerange': self.popall() return if name == 'begincidrange': self.popall() return if name == 'endcidrange': objs = [ obj for (_,obj) in self.popall() ] for (s,e,cid) in choplist(3, objs): if (not isinstance(s, str) or not isinstance(e, str) or not isinstance(cid, int) or len(s) != len(e)): continue sprefix = s[:-4] eprefix = e[:-4] if sprefix != eprefix: continue svar = s[-4:] evar = e[-4:] s1 = nunpack(svar) e1 = nunpack(evar) vlen = len(svar) #assert s1 <= e1 for i in xrange(e1-s1+1): x = sprefix+struct.pack('>L',s1+i)[-vlen:] self.cmap.add_code2cid(x, cid+i) return if name == 'begincidchar': self.popall() return if name == 'endcidchar': objs = [ obj for (_,obj) in self.popall() ] for (cid,code) in choplist(2, objs): if isinstance(code, str) and isinstance(cid, str): self.cmap.add_code2cid(code, nunpack(cid)) return if name == 'beginbfrange': self.popall() return if name == 'endbfrange': objs = [ obj for (_,obj) in self.popall() ] for (s,e,code) in choplist(3, objs): if (not isinstance(s, str) or not isinstance(e, str) or len(s) != len(e)): continue s1 = nunpack(s) e1 = nunpack(e) #assert s1 <= e1 if isinstance(code, list): for i in xrange(e1-s1+1): self.cmap.add_cid2unichr(s1+i, code[i]) else: var = code[-4:] base = nunpack(var) prefix = code[:-4] vlen = len(var) for i in xrange(e1-s1+1): x = prefix+struct.pack('>L',base+i)[-vlen:] self.cmap.add_cid2unichr(s1+i, x) return if name == 'beginbfchar': self.popall() return if name == 'endbfchar': objs = [ obj for (_,obj) in self.popall() ] for (cid,code) in choplist(2, objs): if isinstance(cid, str) and isinstance(code, str): self.cmap.add_cid2unichr(nunpack(cid), code) return if name == 'beginnotdefrange': self.popall() return if name == 'endnotdefrange': self.popall() return self.push((pos, token)) return
def main(): global args, comm signal.signal(signal.SIGINT, sig_handler) args = parse_and_bcast(comm, gen_parser) try: G.src = utils.check_src(args.path) except ValueError as e: err_and_exit("Error: %s not accessible" % e) G.loglevel = args.loglevel #G.use_store = args.use_store G.reduce_interval = args.interval G.memitem_threshold = args.item hosts_cnt = tally_hosts() circle = Circle() if circle.rank == 0: print("Running Parameters:\n") print("\t{:<20}{:<20}".format("FSUM version:", __version__)) print("\t{:<20}{:<20}".format("Num of hosts:", hosts_cnt)) print("\t{:<20}{:<20}".format("Num of processes:", MPI.COMM_WORLD.Get_size())) print("\t{:<20}{:<20}".format("Root path:", utils.choplist(G.src))) print("\t{:<20}{:<20}".format("Items in memory:", G.memitem_threshold)) fwalk = FWalk(circle, G.src) circle.begin(fwalk) if G.use_store: fwalk.flushdb() fwalk.epilogue() circle.finalize() # by default, we use adaptive chunksize chunksize = utils.calc_chunksize(T.total_filesize) if args.chunksize: chunksize = conv_unit(args.chunksize) if circle.rank == 0: print("Chunksize = ", chunksize) circle = Circle() fcheck = Checksum(circle, fwalk, chunksize, T.total_filesize, T.total_files) circle.begin(fcheck) circle.finalize() if circle.rank == 0: sys.stdout.write("\nAggregating ... ") """ chunkl = circle.comm.gather(fcheck.chunkq) if circle.rank == 0: chunks = [item for sublist in chunkl for item in sublist] chunks.sort() sys.stdout.write("%s chunks\n" % len(chunks)) sha1val = do_checksum(chunks) with open(args.output, "w") as f: f.write("sha1: %s\n" % sha1val) f.write("chunksize: %s\n" % chunksize) f.write("fwalk version: %s\n" % __version__) f.write("src: %s\n" % utils.choplist(G.src)) f.write("date: %s\n" % utils.current_time()) f.write("totalsize: %s\n" % T.total_filesize) print("\nSHA1: %s" % sha1val) print("Signature file: [%s]" % args.output) if args.export_block_signatures: export_checksum2(chunks, args.output) print("Exporting block signatures ... \n") """ if circle.rank > 0: circle.comm.send(fcheck.bfsign.bitarray, dest=0) else: for p in xrange(1, circle.comm.size): other_bitarray = circle.comm.recv(source=p) fcheck.bfsign.or_bf(other_bitarray) circle.comm.Barrier() if circle.comm.rank == 0: sha1val = fcheck.bfsign.gen_signature() with open(args.output, "w") as f: f.write("sha1: %s\n" % sha1val) f.write("chunksize: %s\n" % chunksize) f.write("fwalk version: %s\n" % __version__) f.write("src: %s\n" % utils.choplist(G.src)) f.write("date: %s\n" % utils.current_time()) f.write("totalsize: %s\n" % T.total_filesize) print("\nSHA1: %s" % sha1val) print("Signature file: [%s]" % args.output) fcheck.epilogue() if circle.comm.rank == 0: if os.path.exists(G.tempdir): shutil.rmtree(G.tempdir, ignore_errors=True)
def main(): global comm, args args = parse_and_bcast(comm, gen_parser) try: G.src = utils.check_src(args.path) except ValueError as e: err_and_exit("Error: %s not accessible" % e) G.use_store = args.use_store G.loglevel = args.loglevel hosts_cnt = tally_hosts() if comm.rank == 0: print("Running Parameters:\n") print("\t{:<20}{:<20}".format("FWALK version:", __version__)) print("\t{:<20}{:<20}".format("Num of hosts:", hosts_cnt)) print("\t{:<20}{:<20}".format("Num of processes:", MPI.COMM_WORLD.Get_size())) print("\t{:<20}{:<20}".format("Root path:", utils.choplist(G.src))) circle = Circle() treewalk = FWalk(circle, G.src) circle.begin(treewalk) if G.use_store: treewalk.flushdb() if args.stats: hist = global_histogram(treewalk) total = hist.sum() bucket_scale = 0.5 if comm.rank == 0: print("\nFileset histograms:\n") for idx, rightbound in enumerate(bins[1:]): percent = 100 * hist[idx] / float(total) star_count = int(bucket_scale * percent) print("\t{:<3}{:<15}{:<8}{:<8}{:<50}".format( "< ", utils.bytes_fmt(rightbound), hist[idx], "%0.2f%%" % percent, '∎' * star_count)) if args.stats: treewalk.flist.sort(lambda f1, f2: cmp(f1.st_size, f2.st_size), reverse=True) globaltops = comm.gather(treewalk.flist[:args.top]) if comm.rank == 0: globaltops = [item for sublist in globaltops for item in sublist] globaltops.sort(lambda f1, f2: cmp(f1.st_size, f2.st_size), reverse=True) if len(globaltops) < args.top: args.top = len(globaltops) print("\nStats, top %s files\n" % args.top) for i in xrange(args.top): print("\t{:15}{:<30}".format( utils.bytes_fmt(globaltops[i].st_size), globaltops[i].path)) treewalk.epilogue() treewalk.cleanup() circle.finalize()
def main(): global args, comm signal.signal(signal.SIGINT, sig_handler) args = parse_and_bcast(comm, gen_parser) try: G.src = utils.check_src(args.path) except ValueError as e: err_and_exit("Error: %s not accessible" % e) G.loglevel = args.loglevel G.use_store = args.use_store G.reduce_interval = args.interval hosts_cnt = tally_hosts() circle = Circle() if circle.rank == 0: print("Running Parameters:\n") print("\t{:<20}{:<20}".format("FSUM version:", __version__)) print("\t{:<20}{:<20}".format("Num of hosts:", hosts_cnt)) print("\t{:<20}{:<20}".format("Num of processes:", MPI.COMM_WORLD.Get_size())) print("\t{:<20}{:<20}".format("Root path:", utils.choplist(G.src))) fwalk = FWalk(circle, G.src) circle.begin(fwalk) if G.use_store: fwalk.flushdb() totalsize = fwalk.epilogue() circle.finalize() # by default, we use adaptive chunksize chunksize = utils.calc_chunksize(totalsize) if args.chunksize: chunksize = conv_unit(args.chunksize) if circle.rank == 0: print("Chunksize = ", chunksize) circle = Circle() fcheck = Checksum(circle, fwalk, chunksize, totalsize) circle.begin(fcheck) circle.finalize() if circle.rank == 0: sys.stdout.write("\nAggregating ... ") chunkl = circle.comm.gather(fcheck.chunkq) if circle.rank == 0: chunks = [item for sublist in chunkl for item in sublist] chunks.sort() sys.stdout.write("%s chunks\n" % len(chunks)) sha1val = do_checksum(chunks) with open(args.output, "w") as f: f.write("sha1: %s\n" % sha1val) f.write("chunksize: %s\n" % chunksize) f.write("fwalk version: %s\n" % __version__) f.write("src: %s\n" % utils.choplist(G.src)) f.write("date: %s\n" % utils.current_time()) f.write("totalsize: %s\n" % totalsize) print("\nSHA1: %s" % sha1val) print("Signature file: [%s]" % args.output) if args.export_block_signatures: export_checksum2(chunks, args.output) print("Exporting block signatures ... \n") fcheck.epilogue()