def test_cosem_ome():
    transform_base = {
        "axes": ["z", "y", "x"],
        "units": ["nm", "m", "km"],
        "translate": [0, -8, 10],
        "scale": [1.0, 1.0, 10.0],
    }
    shape_base = (16, 16, 16)
    coords = [
        DataArray(
            (np.arange(shape_base[idx]) * transform_base["scale"][idx]) +
            transform_base["translate"][idx],
            dims=(transform_base["axes"][idx], ),
            attrs={"units": transform_base["units"][idx]},
        ) for idx in range(len(shape_base))
    ]

    data = DataArray(np.zeros(shape_base), coords=coords, name="data")
    multi = multiscale(data, windowed_mean, (2, 2, 2))[:2]
    paths = ["s0", "s1"]
    cosem_ome_group_metadata = COSEMGroupMetadata.fromDataArrays(multi,
                                                                 paths=paths,
                                                                 name="data")
    scale_metas = [
        ScaleMeta(path=p, transform=SpatialTransform.fromDataArray(m))
        for p, m in zip(paths, multi)
    ]
    assert cosem_ome_group_metadata == COSEMGroupMetadata(
        multiscales=[MultiscaleMeta(name="data", datasets=scale_metas)])
def prepare_multiscale_storage(
    source: str,
    source_chunks: Union[Tuple[int, ...], ChunkMode],
    dest: str,
    dest_chunks: Union[Tuple[int, ...], ChunkMode],
    dest_access_mode: Union[WriteableAccessMode, Tuple[WriteableAccessMode,
                                                       WriteableAccessMode]],
    downsampling_method: str,
    downsampling_factors: Tuple[int, ...],
    downsampling_levels: Tuple[int, ...],
    downsampling_chunks: Tuple[int, ...],
):

    chunk_mode = "minimum"
    source_xr = read_xarray(source, chunks=source_chunks, name=source)
    logger = logging.getLogger(__name__)
    logger.info(f"Found array {source_xr} at {source}")
    if downsampling_method == "mean":
        reducer = windowed_mean
    elif downsampling_method == "mode":
        reducer = windowed_mode
    else:
        raise ValueError(
            f'Invalid downsampling method. Must be one of ("mean", "mode"), got {downsampling_method}'
        )

    if isinstance(dest_access_mode, WriteableAccessMode):
        access_modes = (dest_access_mode, ) * 2
    else:
        access_modes = dest_access_mode

    arrays = multiscale(
        source_xr,
        reducer,
        scale_factors=downsampling_factors,
        chunks=downsampling_chunks,
        chunk_mode=chunk_mode,
    )

    if len(downsampling_levels) == 0:
        downsampling_levels = tuple(range(len(arrays)))
    arrays = [arrays[idx] for idx in downsampling_levels]
    array_dict = {
        f"s{idx}": array
        for idx, array in zip(downsampling_levels, arrays)
    }
    logger.info(f"Prepared {len(array_dict)} arrays: {array_dict}")
    ms = Multiscales(name="foo", arrays=array_dict)
    store_group, store_arrays, storage = ms.store(dest,
                                                  chunks=dest_chunks,
                                                  access_modes=access_modes)

    data_volume = dask.utils.memory_repr(sum(a.nbytes for a in store_arrays))
    logger.info("Preparing to write to arrays:")
    for array in store_arrays:
        logger.info(array.info)
    logger.info(f"Total data volume: {data_volume}")
    return storage
Exemple #3
0
def make_multiscale(dataset: str, path: str, out_path: str, reference_path: str, dry: bool):
    read_chunks = (512,) * 3 
    store_chunk_map={'dense' : (64,) *3, 'sparse': (256,) * 3}
    scale_factors = (2,) * 3

    if reference_path:
        reference_coords = read_xarray(reference_path).coords
    else:
        reference_coords = None

    def to_upload(v):
        return True

    to_skip = lambda v: False
    to_flip = ()

    locking = False

    num_workers = 20
    # source_arrays = get_upload_targets(read(path))
    source_arrays  = {'labels/gt': path}

    for source_name, source in source_arrays.items():
        if not to_upload(source_name) or to_skip(source_name):
            click.echo(f'Skipping {source_name}')
        else:
            click.echo(f'Uploading {source_name} to {os.path.join(out_path, source_name)}')
            dest_name = source_name
            data = read_xarray(path, chunks=read_chunks, name=dest_name, storage_options={'normalize_keys': False})
            if source_name in to_flip:
                click.echo('flipping')
                data = flip_y(data)
            
            if reference_coords:
                data = data.assign_coords(reference_coords)
            
            if source_name.endswith('pred') or ('fibsem' in source_name) or ('raw' in source_name) or ('em' in source_name) or ('lm' in source_name):
                reducer = mean_reduce
                store_chunks = store_chunk_map['dense']
            else:
                reducer = mode_reduce
                store_chunks = store_chunk_map['sparse']
            
            scales = {f's{idx}' : v for idx,v in enumerate(multiscale(data, reducer, scale_factors, chained=True)[:5])}
            ms = Multiscales(name=dest_name, arrays=scales)

            if not locking:
                for k,v in ms.arrays.items():
                    v.data = ensure_minimum_chunksize(v.data, store_chunks)

            if not dry:
                lsf_kwargs={'walltime' : "24:00", "memory" : '30GB'}
                click.echo(f'reducing with {reducer.__name__}, saving {tuple(scales.keys())} to storage with {store_chunks=}')
                with get_cluster(threads_per_worker=2, lsf_kwargs=lsf_kwargs) as clust, Client(clust) as cl:
                    storage_group, storage_arrays, storage_op = ms.store(store=out_path, mode='a', storage_options={'normalize_keys' : False, 'profile' : 'COSEMPDSAdmin'}, chunks=store_chunks, locking=locking, client=cl)
                    click.echo(cl.cluster.dashboard_link)
                    cl.cluster.scale(num_workers)
                    results = cl.compute(dask.delayed(storage_op), sync=True)
def add_multiscale(n5_path, data_set, downsampling_factors=(2,2,2), \
        downsampling_method=np.mean, thumbnail_size_yx=None):
    '''
    Given an n5 with "s0", generate downsampled versions s1, s2, etc., up to the point where
    the smallest version is larger than thumbnail_size_yx (which defaults to the chunk size).
    '''
    print('Generating multiscale for', n5_path)
    store = zarr.N5Store(n5_path)

    # Find out what compression is used for s0, so we can use the same for the multiscale
    fullscale = f'{data_set}/s0'
    r = zarr.open(store=store, mode='r')
    compressor = r[fullscale].compressor

    volume = da.from_zarr(store, component=fullscale)
    chunk_size = volume.chunksize
    thumbnail_size_yx = thumbnail_size_yx or chunk_size
    multi = multiscale(volume,
                       downsampling_method,
                       downsampling_factors,
                       chunks=chunk_size)
    thumbnail_sized = [
        np.less_equal(m.shape, thumbnail_size_yx).all() for m in multi
    ]
    cutoff = thumbnail_sized.index(True)
    multi_to_save = multi[0:cutoff + 1]

    for idx, m in enumerate(multi_to_save):
        if idx == 0: continue
        print(f'Saving level {idx}')
        component = f'{data_set}/s{idx}'

        m.data.to_zarr(store,
                       component=component,
                       overwrite=True,
                       compressor=compressor)

        z = zarr.open(store, path=component, mode='a')
        z.attrs["downsamplingFactors"] = tuple(
            [int(math.pow(f, idx)) for f in downsampling_factors])

    print("Added multiscale imagery to", n5_path)
Exemple #5
0
def main(group_path: str):
    group = access(group_path, mode='a')

    arrays = dict(sorted(group.arrays(), key = lambda kv: int(kv[0][1:])))
    base_pixelRes = neuroglancer.PixelResolution(**group.attrs['pixelResolution'])
    base_transform = cosem.SpatialTransform(axes=group.attrs['axes'][::-1], 
                                            units=(base_pixelRes.unit,) * 3, 
                                            translate=[0.0,] * 3, 
                                        scale=base_pixelRes.dimensions[::-1])
    scale_factors = (2,2,2)

    full_rank = {k: v for k,v in arrays.items() if np.all(np.array(v.shape) > 1)}

    base_data = DataArray(da.from_array(group['s0']), coords=transform_to_coords(group['s0'], base_transform))
    multi = multiscale(base_data, da.mean, scale_factors=scale_factors)[:len(full_rank)]
    multi_meta = [cosem.SpatialTransform.fromDataArray(m).dict() for m in multi]
    [a.attrs.update({'transform' : mmeta}) for a,mmeta in zip(full_rank.values(), multi_meta)]
    group_meta = cosem.COSEMGroupMetadata.fromDataArrays(multi, name='raw', paths=tuple(full_rank.keys())).dict()
    group.attrs.update(group_meta)
    return 1
def test_neuroglancer_metadata():
    coords = [
        DataArray(np.arange(16) + 0.5, dims=("z"), attrs={"units": "nm"}),
        DataArray(np.arange(16) + 1 / 3, dims=("y", ), attrs={"units": "m"}),
        DataArray(10 + (np.arange(16) * 100.1),
                  dims=("x", ),
                  attrs={"units": "km"}),
    ]

    data = DataArray(np.zeros((16, 16, 16)), coords=coords)
    multi = multiscale(data, windowed_mean, (2, 2, 2))[:4]
    neuroglancer_metadata = NeuroglancerN5GroupMetadata.fromDataArrays(multi)

    assert neuroglancer_metadata == NeuroglancerN5GroupMetadata(
        axes=["x", "y", "z"],
        units=["km", "m", "nm"],
        scales=[[1, 1, 1], [2, 2, 2], [4, 4, 4], [8, 8, 8]],
        pixelResolution=PixelResolution(dimensions=[100.1, 1.0, 1.0],
                                        unit="km"),
    )
name = f'lsf_nw-{num_workers}_tpw-{tpw}_chunk-locking-{chunk_locking}'
levels = list(range(1,6))
crop = (slice(8192),) * 3

def reducer(v, **kwargs):
    return np.mean(v, dtype='float32', **kwargs)

source_path = '/nrs/flyem/bench/Z0720_07m_BR.n5/render/Sec30/v1_acquire_trimmed_align___20210413_194018/s0'
target_path = '/nrs/flyem/bench/Z0720_07m_BR.n5/test_dask_down/'

store_chunks = read(source_path, storage_options={'normalize_keys': False}).chunks
read_chunks=(1024,) * 3

data = read_xarray(source_path, storage_options={'normalize_keys': False}, chunks=read_chunks, name='test_data')[crop]

multi = get(levels, multiscale(data, reducer, (2,2,2)))

if not chunk_locking:
    for m in multi:
        m.data = ensure_minimum_chunksize(m.data, store_chunks)

    
multi_store = Multiscales(name, {f's{l}' : m for l,m in zip(levels, multi)})

if __name__ == '__main__':
    with get_cluster(threads_per_worker=tpw) as cluster, Client(cluster) as cl:
        print(cl.cluster.dashboard_link)
        cl.cluster.scale(num_workers)
        cl.wait_for_workers(num_workers)
        with performance_report(filename=os.path.join(target_path, f'{name}_report.html')):
            store_group, store_arrays, storage_op = multi_store.store(target_path, locking=chunk_locking, client=cl, mode='w')