def classify_files(archive_dir, by_path): unchanged, changed, new, seen = set(), set(), set(), set() print 'walking ..' all_files = filter(should_index, flat_walk(archive_dir)) for i, file in enumerate(all_files): if i % 1000 == 999: print 'processing', i, '/', len(all_files), '...' relpath = path.relpath(file, archive_dir) seen.add(relpath) if relpath not in by_path: new.add(relpath) else: mtime, size = stat(file) if (by_path[relpath]['mtime'] == mtime and by_path[relpath]['filesize'] == size): unchanged.add(relpath) else: changed.add(relpath) missing = set() for pth in by_path: if pth not in seen: missing.add(pth) return unchanged, changed, new, missing
def main(): args = get_parser().parse_args() logging.info('searching for files to import ..') to_import = filter(should_index, flat_walk(args.from_dir)) logging.info('found %d files.' % len(to_import)) logging.info('detecting dates ..') dates = detect_dates(args, to_import) home = args.to_dir with Index(path.join(home, 'pictures.db'), autocommit=not args.dry_run) as index: to_import, duplicates = filter_duplicates(to_import, index) logging.info('%d duplicates, %d to be imported' % (len(duplicates), len(to_import))) for imp in to_import: logging.info('Importing %s', imp) for imp in to_import: import_file(imp, dates[imp], home, index, args.dry_run, args.move)
#!/usr/bin/python # -*- coding: utf-8 -*- from os import path from sys import argv from lib.fs_utils import flat_walk, md5, stat from lib.index import Index from lib.config import should_index home = argv[1] with Index(path.join(home, 'pictures.db')) as index: all = flat_walk(home) for file in filter(should_index, all): hash = md5(file) mtime, size = stat(file) pth = path.relpath(file, home) print 'indexing', pth, mtime, size index.add(origin=file, path=pth, mtime=mtime, filesize=size, md5=hash)