def test_cosem_ome(): transform_base = { "axes": ["z", "y", "x"], "units": ["nm", "m", "km"], "translate": [0, -8, 10], "scale": [1.0, 1.0, 10.0], } shape_base = (16, 16, 16) coords = [ DataArray( (np.arange(shape_base[idx]) * transform_base["scale"][idx]) + transform_base["translate"][idx], dims=(transform_base["axes"][idx], ), attrs={"units": transform_base["units"][idx]}, ) for idx in range(len(shape_base)) ] data = DataArray(np.zeros(shape_base), coords=coords, name="data") multi = multiscale(data, windowed_mean, (2, 2, 2))[:2] paths = ["s0", "s1"] cosem_ome_group_metadata = COSEMGroupMetadata.fromDataArrays(multi, paths=paths, name="data") scale_metas = [ ScaleMeta(path=p, transform=SpatialTransform.fromDataArray(m)) for p, m in zip(paths, multi) ] assert cosem_ome_group_metadata == COSEMGroupMetadata( multiscales=[MultiscaleMeta(name="data", datasets=scale_metas)])
def prepare_multiscale_storage( source: str, source_chunks: Union[Tuple[int, ...], ChunkMode], dest: str, dest_chunks: Union[Tuple[int, ...], ChunkMode], dest_access_mode: Union[WriteableAccessMode, Tuple[WriteableAccessMode, WriteableAccessMode]], downsampling_method: str, downsampling_factors: Tuple[int, ...], downsampling_levels: Tuple[int, ...], downsampling_chunks: Tuple[int, ...], ): chunk_mode = "minimum" source_xr = read_xarray(source, chunks=source_chunks, name=source) logger = logging.getLogger(__name__) logger.info(f"Found array {source_xr} at {source}") if downsampling_method == "mean": reducer = windowed_mean elif downsampling_method == "mode": reducer = windowed_mode else: raise ValueError( f'Invalid downsampling method. Must be one of ("mean", "mode"), got {downsampling_method}' ) if isinstance(dest_access_mode, WriteableAccessMode): access_modes = (dest_access_mode, ) * 2 else: access_modes = dest_access_mode arrays = multiscale( source_xr, reducer, scale_factors=downsampling_factors, chunks=downsampling_chunks, chunk_mode=chunk_mode, ) if len(downsampling_levels) == 0: downsampling_levels = tuple(range(len(arrays))) arrays = [arrays[idx] for idx in downsampling_levels] array_dict = { f"s{idx}": array for idx, array in zip(downsampling_levels, arrays) } logger.info(f"Prepared {len(array_dict)} arrays: {array_dict}") ms = Multiscales(name="foo", arrays=array_dict) store_group, store_arrays, storage = ms.store(dest, chunks=dest_chunks, access_modes=access_modes) data_volume = dask.utils.memory_repr(sum(a.nbytes for a in store_arrays)) logger.info("Preparing to write to arrays:") for array in store_arrays: logger.info(array.info) logger.info(f"Total data volume: {data_volume}") return storage
def make_multiscale(dataset: str, path: str, out_path: str, reference_path: str, dry: bool): read_chunks = (512,) * 3 store_chunk_map={'dense' : (64,) *3, 'sparse': (256,) * 3} scale_factors = (2,) * 3 if reference_path: reference_coords = read_xarray(reference_path).coords else: reference_coords = None def to_upload(v): return True to_skip = lambda v: False to_flip = () locking = False num_workers = 20 # source_arrays = get_upload_targets(read(path)) source_arrays = {'labels/gt': path} for source_name, source in source_arrays.items(): if not to_upload(source_name) or to_skip(source_name): click.echo(f'Skipping {source_name}') else: click.echo(f'Uploading {source_name} to {os.path.join(out_path, source_name)}') dest_name = source_name data = read_xarray(path, chunks=read_chunks, name=dest_name, storage_options={'normalize_keys': False}) if source_name in to_flip: click.echo('flipping') data = flip_y(data) if reference_coords: data = data.assign_coords(reference_coords) if source_name.endswith('pred') or ('fibsem' in source_name) or ('raw' in source_name) or ('em' in source_name) or ('lm' in source_name): reducer = mean_reduce store_chunks = store_chunk_map['dense'] else: reducer = mode_reduce store_chunks = store_chunk_map['sparse'] scales = {f's{idx}' : v for idx,v in enumerate(multiscale(data, reducer, scale_factors, chained=True)[:5])} ms = Multiscales(name=dest_name, arrays=scales) if not locking: for k,v in ms.arrays.items(): v.data = ensure_minimum_chunksize(v.data, store_chunks) if not dry: lsf_kwargs={'walltime' : "24:00", "memory" : '30GB'} click.echo(f'reducing with {reducer.__name__}, saving {tuple(scales.keys())} to storage with {store_chunks=}') with get_cluster(threads_per_worker=2, lsf_kwargs=lsf_kwargs) as clust, Client(clust) as cl: storage_group, storage_arrays, storage_op = ms.store(store=out_path, mode='a', storage_options={'normalize_keys' : False, 'profile' : 'COSEMPDSAdmin'}, chunks=store_chunks, locking=locking, client=cl) click.echo(cl.cluster.dashboard_link) cl.cluster.scale(num_workers) results = cl.compute(dask.delayed(storage_op), sync=True)
def add_multiscale(n5_path, data_set, downsampling_factors=(2,2,2), \ downsampling_method=np.mean, thumbnail_size_yx=None): ''' Given an n5 with "s0", generate downsampled versions s1, s2, etc., up to the point where the smallest version is larger than thumbnail_size_yx (which defaults to the chunk size). ''' print('Generating multiscale for', n5_path) store = zarr.N5Store(n5_path) # Find out what compression is used for s0, so we can use the same for the multiscale fullscale = f'{data_set}/s0' r = zarr.open(store=store, mode='r') compressor = r[fullscale].compressor volume = da.from_zarr(store, component=fullscale) chunk_size = volume.chunksize thumbnail_size_yx = thumbnail_size_yx or chunk_size multi = multiscale(volume, downsampling_method, downsampling_factors, chunks=chunk_size) thumbnail_sized = [ np.less_equal(m.shape, thumbnail_size_yx).all() for m in multi ] cutoff = thumbnail_sized.index(True) multi_to_save = multi[0:cutoff + 1] for idx, m in enumerate(multi_to_save): if idx == 0: continue print(f'Saving level {idx}') component = f'{data_set}/s{idx}' m.data.to_zarr(store, component=component, overwrite=True, compressor=compressor) z = zarr.open(store, path=component, mode='a') z.attrs["downsamplingFactors"] = tuple( [int(math.pow(f, idx)) for f in downsampling_factors]) print("Added multiscale imagery to", n5_path)
def main(group_path: str): group = access(group_path, mode='a') arrays = dict(sorted(group.arrays(), key = lambda kv: int(kv[0][1:]))) base_pixelRes = neuroglancer.PixelResolution(**group.attrs['pixelResolution']) base_transform = cosem.SpatialTransform(axes=group.attrs['axes'][::-1], units=(base_pixelRes.unit,) * 3, translate=[0.0,] * 3, scale=base_pixelRes.dimensions[::-1]) scale_factors = (2,2,2) full_rank = {k: v for k,v in arrays.items() if np.all(np.array(v.shape) > 1)} base_data = DataArray(da.from_array(group['s0']), coords=transform_to_coords(group['s0'], base_transform)) multi = multiscale(base_data, da.mean, scale_factors=scale_factors)[:len(full_rank)] multi_meta = [cosem.SpatialTransform.fromDataArray(m).dict() for m in multi] [a.attrs.update({'transform' : mmeta}) for a,mmeta in zip(full_rank.values(), multi_meta)] group_meta = cosem.COSEMGroupMetadata.fromDataArrays(multi, name='raw', paths=tuple(full_rank.keys())).dict() group.attrs.update(group_meta) return 1
def test_neuroglancer_metadata(): coords = [ DataArray(np.arange(16) + 0.5, dims=("z"), attrs={"units": "nm"}), DataArray(np.arange(16) + 1 / 3, dims=("y", ), attrs={"units": "m"}), DataArray(10 + (np.arange(16) * 100.1), dims=("x", ), attrs={"units": "km"}), ] data = DataArray(np.zeros((16, 16, 16)), coords=coords) multi = multiscale(data, windowed_mean, (2, 2, 2))[:4] neuroglancer_metadata = NeuroglancerN5GroupMetadata.fromDataArrays(multi) assert neuroglancer_metadata == NeuroglancerN5GroupMetadata( axes=["x", "y", "z"], units=["km", "m", "nm"], scales=[[1, 1, 1], [2, 2, 2], [4, 4, 4], [8, 8, 8]], pixelResolution=PixelResolution(dimensions=[100.1, 1.0, 1.0], unit="km"), )
name = f'lsf_nw-{num_workers}_tpw-{tpw}_chunk-locking-{chunk_locking}' levels = list(range(1,6)) crop = (slice(8192),) * 3 def reducer(v, **kwargs): return np.mean(v, dtype='float32', **kwargs) source_path = '/nrs/flyem/bench/Z0720_07m_BR.n5/render/Sec30/v1_acquire_trimmed_align___20210413_194018/s0' target_path = '/nrs/flyem/bench/Z0720_07m_BR.n5/test_dask_down/' store_chunks = read(source_path, storage_options={'normalize_keys': False}).chunks read_chunks=(1024,) * 3 data = read_xarray(source_path, storage_options={'normalize_keys': False}, chunks=read_chunks, name='test_data')[crop] multi = get(levels, multiscale(data, reducer, (2,2,2))) if not chunk_locking: for m in multi: m.data = ensure_minimum_chunksize(m.data, store_chunks) multi_store = Multiscales(name, {f's{l}' : m for l,m in zip(levels, multi)}) if __name__ == '__main__': with get_cluster(threads_per_worker=tpw) as cluster, Client(cluster) as cl: print(cl.cluster.dashboard_link) cl.cluster.scale(num_workers) cl.wait_for_workers(num_workers) with performance_report(filename=os.path.join(target_path, f'{name}_report.html')): store_group, store_arrays, storage_op = multi_store.store(target_path, locking=chunk_locking, client=cl, mode='w')