Esempio n. 1
0
def import_file(from_path, date, home, index, dry_run, mv):
    ym = date[:7]
    dir = path.join(home, ym, date)
    to_path = path.join(dir, path.basename(from_path))
    rel_path = path.relpath(to_path, home)

    logging.info('importing %s to %s (date %s)' % (from_path, rel_path, date))
    if dry_run: return

    if not path.exists(dir):
        makedirs(dir)
    if mv:
        move(from_path, dir)
    else:
        copy(from_path, dir)

    original_path = autorotate(to_path)
    if original_path:
        logging.info('auto rotated %s. original at %s' % (to_path, original_path))
        md5_original = md5(original_path)
    else:
        md5_original = None
    
    mtime, size = stat(to_path)
    index.add(from_path, rel_path, md5(to_path), mtime, size, date,
              md5_original=md5_original)
Esempio n. 2
0
def update_changed_files(args, changed, by_path, index):
    for pth in changed:
        print 'changed', pth

        full_path = path.join(args.archive_dir, pth)
        hash = md5(full_path)
        mtime, size = stat(full_path)

        rowid = by_path[pth]['rowid']

        index.set(rowid, md5=hash, mtime=mtime, filesize=size)
Esempio n. 3
0
def filter_duplicates(to_import, index):
    logging.info('Filtering duplicates ..')
    duplicates, by_md5, new = {}, {}, []
    for i, f in enumerate(to_import):
        if i % 100 == 99:
            logging.info('Checking %d of %d ..', i+1, len(to_import))

        hash = md5(f)
        existing = index.get(md5=hash) or index.get(md5_original=hash)
        if existing:
            duplicates[f] = existing[0]['path']
        elif hash in by_md5:
            duplicates[f] = by_md5[hash]
        else:
            by_md5[hash] = f
            new.append(f)
    return new, duplicates
Esempio n. 4
0
def add_new_files(args, new, index):
    for new_path in new:

        full_path = path.join(args.archive_dir, new_path)
        hash = md5(full_path)
        mtime, size = stat(full_path)

        already = index.get(md5=hash)
        if already:
            if args.allow_duplicate:
                print 'duplicate-new', new_path, 'with', already[0]['path']
            else:
                print 'duplicate-ignore', new_path, 'with', already[0]['path']
                continue
        else:
            print 'new', new_path

        index.add(origin=full_path, path=new_path, md5=hash, mtime=mtime,
                  filesize=size)
Esempio n. 5
0
def detect_moved_files(args, new, missing, by_path):
    missing_md5 = {by_path[missing_path]['md5']: missing_path
                   for missing_path in missing}
    matched_new = set()
    moved = {}
    print 'matching moved files ..'
    for i, new_path in enumerate(sorted(new)):
        if i % 1000 == 999:
            print 'processing', i, '/', len(new), '..', new_path.encode('utf-8')
        hash = md5(path.join(args.archive_dir, new_path))
        if hash in missing_md5:
            from_path = missing_md5[hash]
            to_path = new_path
            moved[from_path] = to_path

            matched_new.add(to_path)

            del missing_md5[hash]

    new = {pth for pth in new if pth not in matched_new}
    missing = {pth for pth in missing if pth not in moved}

    return new, missing, moved
Esempio n. 6
0
#!/usr/bin/python
# -*- coding: utf-8 -*-
from os import path
from sys import argv
from lib.fs_utils import flat_walk, md5, stat
from lib.index import Index
from lib.config import should_index

home = argv[1]

with Index(path.join(home, 'pictures.db')) as index:
    all = flat_walk(home)

    for file in filter(should_index, all):
        hash = md5(file)
        mtime, size = stat(file)
        pth = path.relpath(file, home)

        print 'indexing', pth, mtime, size

        index.add(origin=file, path=pth, mtime=mtime, filesize=size, md5=hash)

    


Esempio n. 7
0
from lib.index import Index
from lib.image_utils import autorotate
from lib.fs_utils import stat, md5
from os import path

with Index('/home/jongman/data/pictures-backup/pictures.db', autocommit=True) as index:
    all = index.get()

    for i, a in enumerate(sorted(all, key=lambda p: p['path'])):
        if i % 100 == 99: print i, '/', len(all), '..', a['path']
        pth = '/home/jongman/data/pictures-backup/' + a['path'] + '.original'
        if path.exists(pth) and a['md5_original'] is None:
            original_md5 = md5(pth)
            index.set(a['rowid'], md5_original=original_md5)
            print 'updated', pth




Esempio n. 8
0
from lib.index import Index
from lib.image_utils import autorotate
from lib.fs_utils import stat, md5

with open('errors.txt', 'w') as errors:
    with Index('/Volumes/Passport/pictures-backup/pictures.db', autocommit=True) as index:
        all = index.get()

        for i, a in enumerate(sorted(all, key=lambda p: p['path'])):
            if i % 100 == 99: print i, '/', len(all), '..', a['path']
            pth = '/Volumes/Passport/pictures-backup/' + a['path']
            try:
                if autorotate(pth):
                    mtime, size = stat(pth)
                    hash = md5(pth)
                    index.set(a['rowid'], mtime=mtime, filesize=size, md5=hash)
                    print 'updated', pth
            except:
                errors.write('%s\n' % pth.encode('utf-8'))