예제 #1
0
파일: dupes.py 프로젝트: dirkraft/backuper
def debug_redis(*args):
    result = redis.execute_command(*args)
    if type(result).__name__ == 'list':
        for member in result:
            log.err(member)

    else:
        log.err(result)
예제 #2
0
파일: dupes.py 프로젝트: dirkraft/backuper
def dupes():
    for key in sorted(redis.keys('hash:*')):
        key = key.decode()
        collision_paths = redis.lrange(key, 0, -1)

        for collision_pair in combinations(collision_paths, 2):
            log.err('Checking', key, *collision_pair)
            file_name_0 = collision_pair[0].decode()
            file_name_1 = collision_pair[1].decode()
            if filecmp.cmp(file_name_0, file_name_1, shallow=False):
                print(file_name_0, '\t', file_name_1)
예제 #3
0
파일: dupes.py 프로젝트: dirkraft/backuper
def scan(dir_, min_size):
    global last_progress

    for abspath in files.scan_files(dir_, progress_interval_sec=PROGRESS_INTERVAL_SEC):

        if os.stat(abspath).st_size < min_size:
            # Too small
            continue

        elif redis.get('path:' + abspath):
            # Already hashed
            continue

        log.err('Hashing', abspath)

        with open(abspath, 'rb') as file:
            md5er = hashlib.md5()
            try:
                # Buffering is required for large files because a single read crashes python (maybe only on OSX).
                # I think that it should not and is a bug, but there is no consensus nor documentation to indicate
                # intended behavior.
                for buf in iter(partial(file.read, BUFFER_THRESH), b''):
                    md5er.update(buf)
                md5 = md5er.hexdigest()

            except OSError as e:
                log.err('  ', file, e)
                continue

        log.err('  ', os.path.relpath(abspath, start=os.getcwd()), md5)
        redis.set('path:' + abspath, md5)
        redis.rpush('hash:' + md5, abspath)
        print(abspath)
예제 #4
0
파일: files.py 프로젝트: dirkraft/backuper
def scan_files(dir_, progress_interval_sec=None, excludes=None):
    """
    Scan a directory recursively for real files.

    :param dir_: path to scan
    :param progress_interval_sec: how frequently to output scan progress messages on stderr, if at all
    :return: a generator which yields absolute paths to files
    """
    global last_progress

    filenames = os.listdir(dir_)
    for filename in filenames:
        abspath = os.path.join(dir_, filename)

        # Output a note on progress?
        now_time = time.perf_counter()
        if progress_interval_sec and now_time > last_progress + progress_interval_sec:
            log.err('Scan progress', abspath)
            last_progress = now_time

        if os.path.islink(abspath):
            # Ignore links
            continue

        elif os.path.isdir(abspath):
            # Recurse into directories
            yield from scan_files(abspath)
            continue

        elif not os.path.isfile(abspath):
            # Not a file (what is it?)
            continue

        elif excludes and any(filter(lambda exclude: fnmatch.fnmatch(abspath, exclude), excludes)):
            # Matched exclude pattern
            continue

        yield abspath
예제 #5
0
파일: cli.py 프로젝트: dirkraft/backuper
def sync(preview, single_select):
    """
    Backup configured paths to destination.
    """

    log.err('')
    log.err(datetime.now().isoformat(), 'Starting sync')

    for (path_spec, base_excludes, local_path, dest) in iter_sync_dirs(single_select):
        cmd = ['aws', 's3', 'sync']
        cmd.extend(base_excludes)
        if 'cli_args' in path_spec:
            cmd.extend(path_spec['cli_args'])
        cmd.append(local_path)
        cmd.append(dest)

        if preview:
            print(' '.join(cmd))
        else:
            print(local_path, ' > ', dest)
            subprocess.call(cmd)

    log.err(datetime.now().isoformat(), 'Finished sync')
예제 #6
0
파일: cli.py 프로젝트: dirkraft/backuper
def validate_config():
    """
    Validate and print configuration.
    """
    log.err('Config at', cfg.CONFIG_PATH)
    print(json.dumps(cfg.load(), indent=2))
예제 #7
0
파일: cli.py 프로젝트: dirkraft/backuper
def set_config_path(path):
    """
    Set location of configuration.
    """
    cfg.set_config_path(path)
    log.err('Config path set to', path)