Example #1
0
def classify_files(archive_dir, by_path):
    unchanged, changed, new, seen = set(), set(), set(), set()

    print 'walking ..'
    all_files = filter(should_index, flat_walk(archive_dir)) 
    for i, file in enumerate(all_files):
        if i % 1000 == 999: 
            print 'processing', i, '/', len(all_files), '...'
        relpath = path.relpath(file, archive_dir)
        seen.add(relpath)
        if relpath not in by_path:
            new.add(relpath)
        else:
            mtime, size = stat(file)
            if (by_path[relpath]['mtime'] == mtime and
                by_path[relpath]['filesize'] == size):
                unchanged.add(relpath)
            else:
                changed.add(relpath)

    missing = set()
    for pth in by_path:
        if pth not in seen:
            missing.add(pth)

    return unchanged, changed, new, missing
Example #2
0
def import_file(from_path, date, home, index, dry_run, mv):
    ym = date[:7]
    dir = path.join(home, ym, date)
    to_path = path.join(dir, path.basename(from_path))
    rel_path = path.relpath(to_path, home)

    logging.info('importing %s to %s (date %s)' % (from_path, rel_path, date))
    if dry_run: return

    if not path.exists(dir):
        makedirs(dir)
    if mv:
        move(from_path, dir)
    else:
        copy(from_path, dir)

    original_path = autorotate(to_path)
    if original_path:
        logging.info('auto rotated %s. original at %s' % (to_path, original_path))
        md5_original = md5(original_path)
    else:
        md5_original = None
    
    mtime, size = stat(to_path)
    index.add(from_path, rel_path, md5(to_path), mtime, size, date,
              md5_original=md5_original)
Example #3
0
def update_moved_files(args, moved, by_path, index):
    for from_path, to_path in moved.items():
        print 'moved', from_path, '=>', to_path

        full_path = path.join(args.archive_dir, to_path)
        mtime, size = stat(full_path)

        rowid = by_path[from_path]['rowid']
        index.set(rowid, path=to_path, mtime=mtime)
Example #4
0
def update_changed_files(args, changed, by_path, index):
    for pth in changed:
        print 'changed', pth

        full_path = path.join(args.archive_dir, pth)
        hash = md5(full_path)
        mtime, size = stat(full_path)

        rowid = by_path[pth]['rowid']

        index.set(rowid, md5=hash, mtime=mtime, filesize=size)
Example #5
0
def add_new_files(args, new, index):
    for new_path in new:

        full_path = path.join(args.archive_dir, new_path)
        hash = md5(full_path)
        mtime, size = stat(full_path)

        already = index.get(md5=hash)
        if already:
            if args.allow_duplicate:
                print 'duplicate-new', new_path, 'with', already[0]['path']
            else:
                print 'duplicate-ignore', new_path, 'with', already[0]['path']
                continue
        else:
            print 'new', new_path

        index.add(origin=full_path, path=new_path, md5=hash, mtime=mtime,
                  filesize=size)
Example #6
0
#!/usr/bin/python
# -*- coding: utf-8 -*-
from os import path
from sys import argv
from lib.fs_utils import flat_walk, md5, stat
from lib.index import Index
from lib.config import should_index

home = argv[1]

with Index(path.join(home, 'pictures.db')) as index:
    all = flat_walk(home)

    for file in filter(should_index, all):
        hash = md5(file)
        mtime, size = stat(file)
        pth = path.relpath(file, home)

        print 'indexing', pth, mtime, size

        index.add(origin=file, path=pth, mtime=mtime, filesize=size, md5=hash)

    


Example #7
0
from lib.index import Index
from lib.image_utils import autorotate
from lib.fs_utils import stat, md5

with open('errors.txt', 'w') as errors:
    with Index('/Volumes/Passport/pictures-backup/pictures.db', autocommit=True) as index:
        all = index.get()

        for i, a in enumerate(sorted(all, key=lambda p: p['path'])):
            if i % 100 == 99: print i, '/', len(all), '..', a['path']
            pth = '/Volumes/Passport/pictures-backup/' + a['path']
            try:
                if autorotate(pth):
                    mtime, size = stat(pth)
                    hash = md5(pth)
                    index.set(a['rowid'], mtime=mtime, filesize=size, md5=hash)
                    print 'updated', pth
            except:
                errors.write('%s\n' % pth.encode('utf-8'))