def list_remote(): resource = create_resource() return list(map( attrgetter('name'), resource.buckets.all() ))
def repository(repository_name): resource = create_resource() repository = resource.Bucket(repository_name) repository.objects.all().delete() yield repository repository.objects.all().delete()
def init_worker(*fs): for f in fs: f._resource = create_resource()
def download(repository_name, root=data_root()): if type(root) == str: root = Path(root) pool = mp.Pool( processes=16, initializer=init_worker, initargs=(loader, changed, downloader), ) resource = create_resource() repository_path = root / repository_name repository_path.mkdir(parents=True, exist_ok=True) local_objects = list_local_objects(repository_name, root=root, resource=resource) remote_objects = list_remote_objects(repository_name, root=root, resource=resource) removed_object_keys = local_objects.keys() - remote_objects.keys() created_object_keys = remote_objects.keys() - local_objects.keys() potentially_updated_object_keys = local_objects.keys( ) & remote_objects.keys() removed_objects = itemgetter(*removed_object_keys)(local_objects) created_objects = itemgetter(*created_object_keys)(remote_objects) potentially_updated_objects = tuple( map( lambda key: (local_objects[key], remote_objects[key]), potentially_updated_object_keys, )) sorted_removed_objects = sorted( removed_objects, key=methodcaller('key'), reverse=True, ) list(map( methodcaller('remove'), sorted_removed_objects, )) # FIXME remove duplication potentially_updated_changed = list( tqdm( pool.imap(changed, potentially_updated_objects, 2), desc=f'checking changes', total=len(potentially_updated_objects), )) updated_objects = tuple( map( lambda x: x[1][1].with_resource(None ), # NOTE [1][1] and not [1][0] filter( lambda x: x[0], zip(potentially_updated_changed, potentially_updated_objects)))) sorted_created_updated_objects = sorted( created_objects + updated_objects, key=methodcaller('key'), ) list( tqdm( pool.imap(downloader, sorted_created_updated_objects), desc=f'download', total=len(sorted_created_updated_objects), )) pool.close() return repository_path
def upload(repository_name, root=data_root()): if type(root) == str: root = Path(root) pool = mp.Pool( processes=16, initializer=init_worker, initargs=(loader, changed, remover, uploader), ) resource = create_resource() repository_bucket = resource.Bucket(repository_name) import botocore try: repository_bucket.create() except: pass local_objects = list_local_objects(repository_name, root=root, resource=resource) remote_objects = list_remote_objects(repository_name, root=root, resource=resource) removed_object_keys = remote_objects.keys() - local_objects.keys() created_object_keys = local_objects.keys() - remote_objects.keys() potentially_updated_object_keys = local_objects.keys() & remote_objects.keys() removed_objects = itemgetter(*removed_object_keys)(remote_objects) created_objects = itemgetter(*created_object_keys)(local_objects) potentially_updated_objects = tuple(map( lambda key: (local_objects[key], remote_objects[key]), potentially_updated_object_keys, )) sorted_removed_objects = sorted( removed_objects, key=methodcaller('key'), reverse=True, ) list(tqdm( pool.imap(remover, sorted_removed_objects, 2), desc=f'remove', total=len(sorted_removed_objects), )) potentially_updated_changed = list(tqdm( pool.imap(changed, potentially_updated_objects, 2), desc=f'checking changes', total=len(potentially_updated_objects), )) updated_objects = tuple(map( lambda x: x[1][0].with_resource(None), filter( lambda x: x[0], zip(potentially_updated_changed, potentially_updated_objects) ) )) sorted_created_updated_objects = sorted( created_objects + updated_objects, key=methodcaller('key'), ) list(tqdm( pool.imap(uploader, sorted_created_updated_objects, 2), desc=f'upload', total=len(sorted_created_updated_objects), )) pool.close() return repository_bucket