if args['verbose']>=4: logging.basicConfig(format='%(asctime)s:%(levelname)s:%(message)s', level=logging.DEBUG, datefmt='%Y%m%d %I:%M:%S') elif args['verbose']>=2: logging.basicConfig(format='%(asctime)s:%(levelname)s:%(message)s', level=logging.WARNING, datefmt='%Y%m%d %I:%M:%S') elif args['verbose']>=1: logging.basicConfig(format='%(asctime)s:%(levelname)s:%(message)s', level=logging.ERROR, datefmt='%Y%m%d %I:%M:%S') else: logging.basicConfig(format='%(asctime)s:%(levelname)s:%(message)s', level=logging.ERROR, datefmt='%Y%m%d %I:%M:%S') pfmt = PathFormatter(120) cache = sqlite.sqlite(); cache.setCacheLocation(args['cacheOld']) cache.initialize() analyzer = BackupAnalyzer() analyzer.setCache(cache) analyzer.initialize() logging.info("files with full hashes: %s files" % (analyzer.getFilesWithFullHashesCount())) hh = Hasher.FullContentHashV1.Base() hh.initialize() fh = None if args['Hasher'] == 'FastContentHashV1.Cached': fh = Hasher.FastContentHashV1.Cached() elif args['Hasher'] == 'FastContentHashV2.Cached': fh = Hasher.FastContentHashV2.Cached() elif args['Hasher'] == 'FastContentHashV2.Cached_noInode': fh = Hasher.FastContentHashV2.Cached_noInode()
import Cache.sqlite as sqlite import os, datetime parser = argparse.ArgumentParser(description='Create the sqlite DB') parser.add_argument('--cache', dest='cache', action='store', type=str, default='',help='TODO') parser.add_argument('--data', dest='data', action='store', type=str, default='',help='TODO') parser.add_argument('--mode', dest='mode', action='store', type=str, default='auto',help='TODO') args = vars(parser.parse_args()) cache = sqlite.sqlite(); cache.setCacheLocation(args['cache']) cache.initialize() analyzer = BackupAnalyzer() analyzer.setCache(cache) analyzer.initialize() if args['mode']=='auto': print "items total: %d files" % (analyzer.getFilesCount()) print "items total: %d dirs" % (analyzer.getDirsCount()) print "size total: %s" % (humanize.naturalsize(analyzer.getTotalSize())) print " avg size: %s" % (humanize.naturalsize(analyzer.getAvgSize())) print " median size: %s" % (humanize.naturalsize(analyzer.getMedianSize())) print "duplicated files total: %s files" % (analyzer.getDuplicatedFilesCount()) print "duplicated files size: %s" % (humanize.naturalsize(analyzer.getDuplicatedFilesSize())) print "duplicated empty files : %s files" % (analyzer.getEmptyFilesCount()) print "largest 10 files: \n %s" % "\n ".join((("%s (%s)" % (os.path.basename(path), humanize.naturalsize(size))) for (size, path) in analyzer.getTop10LargestFiles() if True)) print "empty dirs: %s" % (analyzer.getEmptyDirsCount())
elif args['verbose'] >= 1: logging.basicConfig(format='%(asctime)s:%(levelname)s:%(message)s', level=logging.ERROR, datefmt='%Y%m%d %I:%M:%S') else: logging.basicConfig(format='%(asctime)s:%(levelname)s:%(message)s', level=logging.ERROR, datefmt='%Y%m%d %I:%M:%S') pfmt = PathFormatter(120) cache = sqlite.sqlite() cache.setCacheLocation(args['cacheOld']) cache.initialize() analyzer = BackupAnalyzer() analyzer.setCache(cache) analyzer.initialize() logging.info("files with full hashes: %s files" % (analyzer.getFilesWithFullHashesCount())) hh = Hasher.FullContentHashV1.Base() hh.initialize() fh = None if args['Hasher'] == 'FastContentHashV1.Cached': fh = Hasher.FastContentHashV1.Cached() elif args['Hasher'] == 'FastContentHashV2.Cached': fh = Hasher.FastContentHashV2.Cached() elif args['Hasher'] == 'FastContentHashV2.Cached_noInode':
import Cache.sqlite as sqlite import os, datetime parser = argparse.ArgumentParser(description="Create the sqlite DB") parser.add_argument("--cache", dest="cache", action="store", type=str, default="", help="TODO") parser.add_argument("--data", dest="data", action="store", type=str, default="", help="TODO") parser.add_argument("--mode", dest="mode", action="store", type=str, default="auto", help="TODO") args = vars(parser.parse_args()) cache = sqlite.sqlite() cache.setCacheLocation(args["cache"]) cache.initialize() analyzer = BackupAnalyzer() analyzer.setCache(cache) analyzer.initialize() if args["mode"] == "auto": print "items total: %d files" % (analyzer.getFilesCount()) print "items total: %d dirs" % (analyzer.getDirsCount()) print "size total: %s" % (humanize.naturalsize(analyzer.getTotalSize())) print " avg size: %s" % (humanize.naturalsize(analyzer.getAvgSize())) print " median size: %s" % (humanize.naturalsize(analyzer.getMedianSize())) print "duplicated files total: %s files" % (analyzer.getDuplicatedFilesCount()) print "duplicated files size: %s" % (humanize.naturalsize(analyzer.getDuplicatedFilesSize())) print "duplicated empty files : %s files" % (analyzer.getEmptyFilesCount()) print "largest 10 files: \n %s" % "\n ".join( ( ("%s (%s)" % (os.path.basename(path), humanize.naturalsize(size)))
parser.add_argument('--min', dest='min', action='store', type=int, default='',help='TODO') parser.add_argument('--verbose', dest='verbose', action='store', type=int, default='',help='TODO') args = vars(parser.parse_args()) if args['verbose']>=4: logging.basicConfig(format='%(asctime)s:%(levelname)s:%(message)s', level=logging.DEBUG, datefmt='%Y%m%d %I:%M:%S') else: logging.basicConfig(format='%(asctime)s:%(levelname)s:%(message)s', level=logging.WARNING, datefmt='%Y%m%d %I:%M:%S') pfmt = PathFormatter(120) cache = sqlite.sqlite(); cache.setCacheLocation(args['cache']) cache.initialize() analyzer = BackupAnalyzer() analyzer.setCache(cache) analyzer.initialize() logging.info("files with full hashes: %s files" % (analyzer.getFilesWithFullHashesCount())) logging.info("files without full hashes: %s files" % (analyzer.getFilesWithoutFullHashesCount())) hh = Hasher.FullContentHashV1.Base() hh.initialize() files = analyzer.getFilesWithoutFullHashes('random', max(args['min'], math.ceil(analyzer.getFilesCount()*(args['percent']/100))) ) for (p, fhash, sz) in files: logging.info(" hash: %s" % (pfmt.format(p).ljust(120)))