Example #1
0
def list_remote():
    resource = create_resource()

    return list(map(
        attrgetter('name'),
        resource.buckets.all()
    ))
Example #2
0
def repository(repository_name):
    resource = create_resource()
    repository = resource.Bucket(repository_name)
    repository.objects.all().delete()

    yield repository

    repository.objects.all().delete()
Example #3
0
def init_worker(*fs):
    for f in fs:
        f._resource = create_resource()
Example #4
0
def download(repository_name, root=data_root()):
    if type(root) == str:
        root = Path(root)

    pool = mp.Pool(
        processes=16,
        initializer=init_worker,
        initargs=(loader, changed, downloader),
    )

    resource = create_resource()

    repository_path = root / repository_name
    repository_path.mkdir(parents=True, exist_ok=True)

    local_objects = list_local_objects(repository_name,
                                       root=root,
                                       resource=resource)
    remote_objects = list_remote_objects(repository_name,
                                         root=root,
                                         resource=resource)

    removed_object_keys = local_objects.keys() - remote_objects.keys()
    created_object_keys = remote_objects.keys() - local_objects.keys()
    potentially_updated_object_keys = local_objects.keys(
    ) & remote_objects.keys()

    removed_objects = itemgetter(*removed_object_keys)(local_objects)
    created_objects = itemgetter(*created_object_keys)(remote_objects)
    potentially_updated_objects = tuple(
        map(
            lambda key: (local_objects[key], remote_objects[key]),
            potentially_updated_object_keys,
        ))

    sorted_removed_objects = sorted(
        removed_objects,
        key=methodcaller('key'),
        reverse=True,
    )

    list(map(
        methodcaller('remove'),
        sorted_removed_objects,
    ))

    # FIXME remove duplication
    potentially_updated_changed = list(
        tqdm(
            pool.imap(changed, potentially_updated_objects, 2),
            desc=f'checking changes',
            total=len(potentially_updated_objects),
        ))

    updated_objects = tuple(
        map(
            lambda x: x[1][1].with_resource(None
                                            ),  # NOTE [1][1] and not [1][0]
            filter(
                lambda x: x[0],
                zip(potentially_updated_changed,
                    potentially_updated_objects))))

    sorted_created_updated_objects = sorted(
        created_objects + updated_objects,
        key=methodcaller('key'),
    )

    list(
        tqdm(
            pool.imap(downloader, sorted_created_updated_objects),
            desc=f'download',
            total=len(sorted_created_updated_objects),
        ))

    pool.close()

    return repository_path
Example #5
0
def upload(repository_name, root=data_root()):
    if type(root) == str:
        root = Path(root)

    pool = mp.Pool(
        processes=16,
        initializer=init_worker, initargs=(loader, changed, remover, uploader),
    )

    resource = create_resource()
    repository_bucket = resource.Bucket(repository_name)

    import botocore
    try:
        repository_bucket.create()
    except:
        pass

    local_objects = list_local_objects(repository_name, root=root, resource=resource)
    remote_objects = list_remote_objects(repository_name, root=root, resource=resource)

    removed_object_keys = remote_objects.keys() - local_objects.keys()
    created_object_keys = local_objects.keys() - remote_objects.keys()
    potentially_updated_object_keys = local_objects.keys() & remote_objects.keys()

    removed_objects = itemgetter(*removed_object_keys)(remote_objects)
    created_objects = itemgetter(*created_object_keys)(local_objects)
    potentially_updated_objects = tuple(map(
        lambda key: (local_objects[key], remote_objects[key]),
        potentially_updated_object_keys,
    ))

    sorted_removed_objects = sorted(
        removed_objects,
        key=methodcaller('key'),
        reverse=True,
    )

    list(tqdm(
        pool.imap(remover, sorted_removed_objects, 2),
        desc=f'remove',
        total=len(sorted_removed_objects),
    ))

    potentially_updated_changed = list(tqdm(
        pool.imap(changed, potentially_updated_objects, 2),
        desc=f'checking changes',
        total=len(potentially_updated_objects),
    ))

    updated_objects = tuple(map(
        lambda x: x[1][0].with_resource(None),
        filter(
            lambda x: x[0],
            zip(potentially_updated_changed, potentially_updated_objects)
        )
    ))

    sorted_created_updated_objects = sorted(
        created_objects + updated_objects,
        key=methodcaller('key'),
    )
    list(tqdm(
        pool.imap(uploader, sorted_created_updated_objects, 2),
        desc=f'upload',
        total=len(sorted_created_updated_objects),
    ))

    pool.close()

    return repository_bucket