def download_chunks_threaded(meta, cache, mip, cloudpaths, fn, fill_missing, progress, compress_cache, green=False): locations = cache.compute_data_locations(cloudpaths) cachedir = 'file://' + os.path.join(cache.path, meta.key(mip)) def process(cloudpath, filename, enable_cache): img3d, bbox = download_chunk(meta, cache, cloudpath, mip, filename, fill_missing, enable_cache, compress_cache) fn(img3d, bbox) local_downloads = (partial(process, cachedir, os.path.basename(filename), False) for filename in locations['local']) remote_downloads = (partial(process, meta.cloudpath, filename, cache.enabled) for filename in locations['remote']) downloads = itertools.chain(local_downloads, remote_downloads) schedule_jobs( fns=downloads, concurrency=DEFAULT_THREADS, progress=('Downloading' if progress else None), total=len(cloudpaths), green=green, )
def download_chunks_threaded( meta, cache, lru, mip, cloudpaths, fn, decode_fn, fill_missing, progress, compress_cache, green=False, secrets=None, background_color=0, decompress=True, ): """fn is the postprocess callback. decode_fn is a decode fn.""" locations = cache.compute_data_locations(cloudpaths) cachedir = 'file://' + cache.path def process(cloudpath, filename, enable_cache): labels, bbox = download_chunk( meta, cache, lru, cloudpath, mip, filename, fill_missing, enable_cache, compress_cache, secrets, background_color, decode_fn, decompress ) fn(labels, bbox) # If there's an LRU sort the fetches so that the LRU ones are first # otherwise the new downloads can kick out the cached ones and make the # lru useless. if lru is not None and lru.size > 0: if not isinstance(locations['remote'], list): locations['remote'] = list(locations['remote']) locations['local'].sort(key=lambda fname: fname in lru, reverse=True) locations['remote'].sort(key=lambda fname: fname in lru, reverse=True) qualify = lambda fname: os.path.join(meta.key(mip), os.path.basename(fname)) local_downloads = ( partial(process, cachedir, qualify(filename), False) for filename in locations['local'] ) remote_downloads = ( partial(process, meta.cloudpath, filename, cache.enabled) for filename in locations['remote'] ) downloads = itertools.chain( local_downloads, remote_downloads ) if progress and not isinstance(progress, str): progress = "Downloading" schedule_jobs( fns=downloads, concurrency=DEFAULT_THREADS, progress=progress, total=len(cloudpaths), green=green, )
def threaded_upload_chunks( meta, cache, img, mip, chunk_ranges, compress, cdn_cache, progress, n_threads=DEFAULT_THREADS, delete_black_uploads=False, background_color=0, green=False, compress_level=None, ): if cache.enabled: mkdir(cache.path) while img.ndim < 4: img = img[..., np.newaxis] remote = CloudFiles(meta.cloudpath, progress=progress) local = CloudFiles('file://' + cache.path, progress=progress) def do_upload(imgchunk, cloudpath): encoded = chunks.encode(imgchunk, meta.encoding(mip), meta.compressed_segmentation_block_size(mip)) remote.put( path=cloudpath, content=encoded, content_type=content_type(meta.encoding(mip)), compress=should_compress(meta.encoding(mip), compress, cache), compression_level=compress_level, cache_control=cdn_cache_control(cdn_cache), ) if cache.enabled: local.put(path=cloudpath, content=encoded, content_type=content_type(meta.encoding(mip)), compress=should_compress(meta.encoding(mip), compress, cache, iscache=True)) def do_delete(cloudpath): remote.delete(cloudpath) if cache.enabled: local.delete(cloudpath) def process(startpt, endpt, spt, ept): if np.array_equal(spt, ept): return imgchunk = img[startpt.x:endpt.x, startpt.y:endpt.y, startpt.z:endpt.z, :] # handle the edge of the dataset clamp_ept = min2(ept, meta.bounds(mip).maxpt) newept = clamp_ept - spt imgchunk = imgchunk[:newept.x, :newept.y, :newept.z, :] filename = "{}-{}_{}-{}_{}-{}".format(spt.x, clamp_ept.x, spt.y, clamp_ept.y, spt.z, clamp_ept.z) cloudpath = meta.join(meta.key(mip), filename) if delete_black_uploads: if np.any(imgchunk != background_color): do_upload(imgchunk, cloudpath) else: do_delete(cloudpath) else: do_upload(imgchunk, cloudpath) schedule_jobs( fns=(partial(process, *vals) for vals in chunk_ranges), concurrency=n_threads, progress=('Uploading' if progress else None), total=len(chunk_ranges), green=green, )