def rescan(args, dirname, files): env, log, sql_hasher, stats = args for f in files: path = abspath(join(dirname, f)) upath = path.encode('utf-8') if isfile(path): dbentry = sql_hasher.read(path) if dbentry: last_check = (time() - dbentry[5]) if last_check < env['INTERVAL']: stats['fskipped'] += 1 stats['size_skipped'] += dbentry[2] days = (env['INTERVAL'] - last_check) / (60 * 60 * 24) out = '- [in {} days] \t{}' log.info(out.format(round(days, 2), upath)) else: st_path, st_hash, st_size, st_took, st_first, st_last = dbentry chash, took = hash_pipe(path, sql_hasher.ctype) out = '= [{} {}] \t{}' log.info(out.format(grab_unit(st_size), ftime(took), upath)) if st_hash != chash: err = '! {}\n\tstored: {}\n\tcurrent: {}\n' log.error(err.format(path, st_hash, chash)) raise AssertionError(err) else: fpinfo = os.stat(path) checksum, took = hash_pipe(path, sql_hasher.ctype) size = fpinfo.st_size stats['size_new'] += size out = '* [{0:>5} {1:>7}] \t{2}' log.info(out.format(grab_unit(size), ftime(took), upath)) sql_hasher.insert(path, checksum, size, took) stats['fnew'] += 1 stats['ftotal'] += 1
def scan_store(name, dry=False): stats = dict(fnew=0, fdiffers=0, ftotal=0, fskipped=0, size_new=0, size_skipped=0) if name not in ICENV['WATCHED']: raise IOError('store does not exist!') path = ICENV['WATCHED'][name] stats['path'] = path logger = init_logger(join(LOG_DIR, '{}-{}-{}.log'.format(name, ICENV['ALGORITHM'], now()))) sql_hasher = SQLhash(dbfile(name, ICENV), ICENV['ALGORITHM']) tstart = time() try: if not dry: walk(path, rescan, (ICENV, logger, sql_hasher, stats)) except KeyboardInterrupt: logger.debug('caught KeyboardInterrupt; stop!') except Exception as err: logger.debug('undefined error: {}'.format(err)) raise err tstop = time() stats['size'] = sql_hasher.size() stats['tdiff'] = tstop - tstart stats['speed'] = (stats['size_new'] / pow(1024, 2)) / (stats['tdiff'] or 1) stats['line'] = 79 * '-' stats['algorithm'] = ICENV['ALGORITHM'] stats['label'] = name stats['filecount'] = sql_hasher.length() stats['runtime'] = round(tstop - tstart, 5) stats['size_sum'] = grab_unit(stats['size']) stats['took'] = ftime(stats['tdiff']) for key in ['size_new', 'size_skipped']: stats[key] = grab_unit(stats[key]) return stats