Ejemplo n.º 1
0
    def _create_position_array(self, pos):
        """
        maps all of the tiff data into a virtual zarr store in memory for a given position

        Parameters
        ----------
        pos:            (int) index of the position to create array under

        Returns
        -------

        """

        # intialize virtual zarr store and save it under positions
        timepoints, channels, slices = self._get_dimensions(pos)
        self.position_arrays[pos] = zarr.empty(
            shape=(timepoints, channels, slices, self.height, self.width),
            chunks=(1, 1, 1, self.height, self.width),
            dtype=self.dtype)
        # add all the images with this specific dimension.  Will be blank images if dataset
        # is incomplete
        for p, t, c, z in self.coord_map.keys():
            if p == pos:
                self.position_arrays[pos][t, c, z, :, :] = self.get_image(
                    pos, t, c, z)
Ejemplo n.º 2
0
def test_empty_like():
    z = empty(100, 10)
    z2 = empty_like(z)
    eq(z.shape, z2.shape)
    eq(z.chunks, z2.chunks)
    eq(z.dtype, z2.dtype)
    eq(z.cname, z2.cname)
    eq(z.clevel, z2.clevel)
    eq(z.shuffle, z2.shuffle)
    eq(z.fill_value, z2.fill_value)
Ejemplo n.º 3
0
def test_empty_like():
    z = empty(100, 10)
    z2 = empty_like(z)
    eq(z.shape, z2.shape)
    eq(z.chunks, z2.chunks)
    eq(z.dtype, z2.dtype)
    eq(z.cname, z2.cname)
    eq(z.clevel, z2.clevel)
    eq(z.shuffle, z2.shuffle)
    eq(z.fill_value, z2.fill_value)
Ejemplo n.º 4
0
def _zarr_empty(shape, store_or_group, chunks, dtype, name=None, **kwargs):
    # wrapper that maybe creates the array within a group
    if name is not None:
        assert isinstance(store_or_group, zarr.hierarchy.Group)
        return store_or_group.empty(
            name, shape=shape, chunks=chunks, dtype=dtype, **kwargs
        )
    else:
        return zarr.empty(
            shape, chunks=chunks, dtype=dtype, store=store_or_group, **kwargs
        )
    def submit_job(job_id):
        import s3fs

        client = cluster.get_client()
        fs = s3fs.S3FileSystem(
            use_ssl=True,
            client_kwargs=dict(
                endpoint_url="https://js2.jetstream-cloud.org:8001/",
                region_name="RegionOne",
            ),
        )
        store = s3fs.S3Map(root=f"gateway-results/{job_id}",
                           s3=fs)  # , check=False)
        z = zarr.empty(shape=(1000, 1000),
                       chunks=(100, 100),
                       dtype="f4",
                       store=store,
                       compression=None)
        x = da.random.random(size=z.shape, chunks=z.chunks).astype(z.dtype)
        x.store(z, lock=False)

        return "Submitted job {}\n".format(job_id)
Ejemplo n.º 6
0
def rechunk_zarr2zarr_w_dask(source_array,
                             target_chunks,
                             max_mem,
                             target_store,
                             temp_store=None,
                             source_storage_options={},
                             temp_storage_options={},
                             target_storage_options={}):

    shape = source_array.shape
    source_chunks = source_array.chunks
    dtype = source_array.dtype
    itemsize = dtype.itemsize

    read_chunks, int_chunks, write_chunks = rechunking_plan(
        shape, source_chunks, target_chunks, itemsize, max_mem)

    source_read = dsa.from_zarr(source_array,
                                chunks=read_chunks,
                                storage_options=source_storage_options)

    # create target
    target_array = zarr.empty(shape,
                              chunks=target_chunks,
                              dtype=dtype,
                              store=target_store)
    target_array.attrs.update(source_array.attrs)

    if int_chunks == target_chunks:
        target_store_delayed = dsa.store(source_read,
                                         target_array,
                                         lock=False,
                                         compute=False)
        print("One step rechunking plan")
        return target_store_delayed

    else:
        # do intermediate store
        assert temp_store is not None
        int_array = zarr.empty(shape,
                               chunks=int_chunks,
                               dtype=dtype,
                               store=temp_store)
        intermediate_store_delayed = dsa.store(source_read,
                                               int_array,
                                               lock=False,
                                               compute=False)

        int_read = dsa.from_zarr(int_array,
                                 chunks=write_chunks,
                                 storage_options=temp_storage_options)
        target_store_delayed = dsa.store(int_read,
                                         target_array,
                                         lock=False,
                                         compute=False)

        # now do some hacking to chain these together into a single graph.
        # get the two graphs as dicts
        int_dsk = dask.utils.ensure_dict(intermediate_store_delayed.dask)
        target_dsk = dask.utils.ensure_dict(target_store_delayed.dask)

        # find the root store key representing the read
        root_keys = []
        for key in target_dsk:
            if isinstance(key, str):
                if key.startswith('from-zarr'):
                    root_keys.append(key)
        assert len(root_keys) == 1
        root_key = root_keys[0]

        # now rewrite the graph
        target_dsk[root_key] = (lambda a, *b: a, target_dsk[root_key],
                                *int_dsk[intermediate_store_delayed.key])
        target_dsk.update(int_dsk)

        # fuse
        dsk_fused, deps = fuse(target_dsk)
        delayed_fused = Delayed(target_store_delayed.key, dsk_fused)

        print("Two step rechunking plan")
        return delayed_fused
Ejemplo n.º 7
0
for block, info in a.iter_write():
    block[:] = content[info.slice]
acratio = a.cratio
if persistent:
    del a
t1 = time()
print("Time for filling array (caterva, iter): %.3fs ; CRatio: %.1fx" % ((t1 - t0), acratio))

# Create and fill a zarr array
t0 = time()
compressor = numcodecs.Blosc(cname=cname, clevel=clevel, shuffle=filter, blocksize=blocksize)
numcodecs.blosc.set_nthreads(nthreads)
if persistent:
    z = zarr.open(fname_zarr, mode='w', shape=shape, chunks=chunkshape, dtype=dtype, compressor=compressor)
else:
    z = zarr.empty(shape=shape, chunks=chunkshape, dtype=dtype, compressor=compressor)
z[:] = content
zratio = z.nbytes / z.nbytes_stored
if persistent:
    del z
t1 = time()
print("Time for filling array (zarr): %.3fs ; CRatio: %.1fx" % ((t1 - t0), zratio))

# Create and fill a hdf5 array
t0 = time()
filters = tables.Filters(complevel=clevel, complib="blosc:%s" % cname, shuffle=True)
tables.set_blosc_max_threads(nthreads)
if persistent:
    h5f = tables.open_file(fname_h5, 'w')
else:
    h5f = tables.open_file(fname_h5, 'w', driver='H5FD_CORE', driver_core_backing_store=0)
Ejemplo n.º 8
0
def test_empty():
    z = empty(100, 10)
    eq((100, ), z.shape)
    eq((10, ), z.chunks)
Ejemplo n.º 9
0
def test_empty():
    z = empty(100, 10)
    eq((100,), z.shape)
    eq((10,), z.chunks)