def _create_position_array(self, pos): """ maps all of the tiff data into a virtual zarr store in memory for a given position Parameters ---------- pos: (int) index of the position to create array under Returns ------- """ # intialize virtual zarr store and save it under positions timepoints, channels, slices = self._get_dimensions(pos) self.position_arrays[pos] = zarr.empty( shape=(timepoints, channels, slices, self.height, self.width), chunks=(1, 1, 1, self.height, self.width), dtype=self.dtype) # add all the images with this specific dimension. Will be blank images if dataset # is incomplete for p, t, c, z in self.coord_map.keys(): if p == pos: self.position_arrays[pos][t, c, z, :, :] = self.get_image( pos, t, c, z)
def test_empty_like(): z = empty(100, 10) z2 = empty_like(z) eq(z.shape, z2.shape) eq(z.chunks, z2.chunks) eq(z.dtype, z2.dtype) eq(z.cname, z2.cname) eq(z.clevel, z2.clevel) eq(z.shuffle, z2.shuffle) eq(z.fill_value, z2.fill_value)
def _zarr_empty(shape, store_or_group, chunks, dtype, name=None, **kwargs): # wrapper that maybe creates the array within a group if name is not None: assert isinstance(store_or_group, zarr.hierarchy.Group) return store_or_group.empty( name, shape=shape, chunks=chunks, dtype=dtype, **kwargs ) else: return zarr.empty( shape, chunks=chunks, dtype=dtype, store=store_or_group, **kwargs )
def submit_job(job_id): import s3fs client = cluster.get_client() fs = s3fs.S3FileSystem( use_ssl=True, client_kwargs=dict( endpoint_url="https://js2.jetstream-cloud.org:8001/", region_name="RegionOne", ), ) store = s3fs.S3Map(root=f"gateway-results/{job_id}", s3=fs) # , check=False) z = zarr.empty(shape=(1000, 1000), chunks=(100, 100), dtype="f4", store=store, compression=None) x = da.random.random(size=z.shape, chunks=z.chunks).astype(z.dtype) x.store(z, lock=False) return "Submitted job {}\n".format(job_id)
def rechunk_zarr2zarr_w_dask(source_array, target_chunks, max_mem, target_store, temp_store=None, source_storage_options={}, temp_storage_options={}, target_storage_options={}): shape = source_array.shape source_chunks = source_array.chunks dtype = source_array.dtype itemsize = dtype.itemsize read_chunks, int_chunks, write_chunks = rechunking_plan( shape, source_chunks, target_chunks, itemsize, max_mem) source_read = dsa.from_zarr(source_array, chunks=read_chunks, storage_options=source_storage_options) # create target target_array = zarr.empty(shape, chunks=target_chunks, dtype=dtype, store=target_store) target_array.attrs.update(source_array.attrs) if int_chunks == target_chunks: target_store_delayed = dsa.store(source_read, target_array, lock=False, compute=False) print("One step rechunking plan") return target_store_delayed else: # do intermediate store assert temp_store is not None int_array = zarr.empty(shape, chunks=int_chunks, dtype=dtype, store=temp_store) intermediate_store_delayed = dsa.store(source_read, int_array, lock=False, compute=False) int_read = dsa.from_zarr(int_array, chunks=write_chunks, storage_options=temp_storage_options) target_store_delayed = dsa.store(int_read, target_array, lock=False, compute=False) # now do some hacking to chain these together into a single graph. # get the two graphs as dicts int_dsk = dask.utils.ensure_dict(intermediate_store_delayed.dask) target_dsk = dask.utils.ensure_dict(target_store_delayed.dask) # find the root store key representing the read root_keys = [] for key in target_dsk: if isinstance(key, str): if key.startswith('from-zarr'): root_keys.append(key) assert len(root_keys) == 1 root_key = root_keys[0] # now rewrite the graph target_dsk[root_key] = (lambda a, *b: a, target_dsk[root_key], *int_dsk[intermediate_store_delayed.key]) target_dsk.update(int_dsk) # fuse dsk_fused, deps = fuse(target_dsk) delayed_fused = Delayed(target_store_delayed.key, dsk_fused) print("Two step rechunking plan") return delayed_fused
for block, info in a.iter_write(): block[:] = content[info.slice] acratio = a.cratio if persistent: del a t1 = time() print("Time for filling array (caterva, iter): %.3fs ; CRatio: %.1fx" % ((t1 - t0), acratio)) # Create and fill a zarr array t0 = time() compressor = numcodecs.Blosc(cname=cname, clevel=clevel, shuffle=filter, blocksize=blocksize) numcodecs.blosc.set_nthreads(nthreads) if persistent: z = zarr.open(fname_zarr, mode='w', shape=shape, chunks=chunkshape, dtype=dtype, compressor=compressor) else: z = zarr.empty(shape=shape, chunks=chunkshape, dtype=dtype, compressor=compressor) z[:] = content zratio = z.nbytes / z.nbytes_stored if persistent: del z t1 = time() print("Time for filling array (zarr): %.3fs ; CRatio: %.1fx" % ((t1 - t0), zratio)) # Create and fill a hdf5 array t0 = time() filters = tables.Filters(complevel=clevel, complib="blosc:%s" % cname, shuffle=True) tables.set_blosc_max_threads(nthreads) if persistent: h5f = tables.open_file(fname_h5, 'w') else: h5f = tables.open_file(fname_h5, 'w', driver='H5FD_CORE', driver_core_backing_store=0)
def test_empty(): z = empty(100, 10) eq((100, ), z.shape) eq((10, ), z.chunks)
def test_empty(): z = empty(100, 10) eq((100,), z.shape) eq((10,), z.chunks)