def classify_files(archive_dir, by_path): unchanged, changed, new, seen = set(), set(), set(), set() print 'walking ..' all_files = filter(should_index, flat_walk(archive_dir)) for i, file in enumerate(all_files): if i % 1000 == 999: print 'processing', i, '/', len(all_files), '...' relpath = path.relpath(file, archive_dir) seen.add(relpath) if relpath not in by_path: new.add(relpath) else: mtime, size = stat(file) if (by_path[relpath]['mtime'] == mtime and by_path[relpath]['filesize'] == size): unchanged.add(relpath) else: changed.add(relpath) missing = set() for pth in by_path: if pth not in seen: missing.add(pth) return unchanged, changed, new, missing
def import_file(from_path, date, home, index, dry_run, mv): ym = date[:7] dir = path.join(home, ym, date) to_path = path.join(dir, path.basename(from_path)) rel_path = path.relpath(to_path, home) logging.info('importing %s to %s (date %s)' % (from_path, rel_path, date)) if dry_run: return if not path.exists(dir): makedirs(dir) if mv: move(from_path, dir) else: copy(from_path, dir) original_path = autorotate(to_path) if original_path: logging.info('auto rotated %s. original at %s' % (to_path, original_path)) md5_original = md5(original_path) else: md5_original = None mtime, size = stat(to_path) index.add(from_path, rel_path, md5(to_path), mtime, size, date, md5_original=md5_original)
def update_moved_files(args, moved, by_path, index): for from_path, to_path in moved.items(): print 'moved', from_path, '=>', to_path full_path = path.join(args.archive_dir, to_path) mtime, size = stat(full_path) rowid = by_path[from_path]['rowid'] index.set(rowid, path=to_path, mtime=mtime)
def update_changed_files(args, changed, by_path, index): for pth in changed: print 'changed', pth full_path = path.join(args.archive_dir, pth) hash = md5(full_path) mtime, size = stat(full_path) rowid = by_path[pth]['rowid'] index.set(rowid, md5=hash, mtime=mtime, filesize=size)
def add_new_files(args, new, index): for new_path in new: full_path = path.join(args.archive_dir, new_path) hash = md5(full_path) mtime, size = stat(full_path) already = index.get(md5=hash) if already: if args.allow_duplicate: print 'duplicate-new', new_path, 'with', already[0]['path'] else: print 'duplicate-ignore', new_path, 'with', already[0]['path'] continue else: print 'new', new_path index.add(origin=full_path, path=new_path, md5=hash, mtime=mtime, filesize=size)
#!/usr/bin/python # -*- coding: utf-8 -*- from os import path from sys import argv from lib.fs_utils import flat_walk, md5, stat from lib.index import Index from lib.config import should_index home = argv[1] with Index(path.join(home, 'pictures.db')) as index: all = flat_walk(home) for file in filter(should_index, all): hash = md5(file) mtime, size = stat(file) pth = path.relpath(file, home) print 'indexing', pth, mtime, size index.add(origin=file, path=pth, mtime=mtime, filesize=size, md5=hash)
from lib.index import Index from lib.image_utils import autorotate from lib.fs_utils import stat, md5 with open('errors.txt', 'w') as errors: with Index('/Volumes/Passport/pictures-backup/pictures.db', autocommit=True) as index: all = index.get() for i, a in enumerate(sorted(all, key=lambda p: p['path'])): if i % 100 == 99: print i, '/', len(all), '..', a['path'] pth = '/Volumes/Passport/pictures-backup/' + a['path'] try: if autorotate(pth): mtime, size = stat(pth) hash = md5(pth) index.set(a['rowid'], mtime=mtime, filesize=size, md5=hash) print 'updated', pth except: errors.write('%s\n' % pth.encode('utf-8'))