def __init__(self, path: str, mode: str = 'r', storage_type: str = None) -> None: """ Initialize a Zarr file, if mode == 'w', create an empty one, otherwise, load from path path : `str`, path for the zarr object. storage_type : `str`, currently only support 'ZipStore' and 'NestedDirectoryStore'. If None, use 'NestedDirectoryStore' by default. """ self.store = self.root = None if storage_type is None: storage_type = 'NestedDirectoryStore' if mode == 'w': # Create a new zarr file check_and_remove_existing_path(path) self.store = zarr.ZipStore( path, mode='w' ) if storage_type == 'ZipStore' else zarr.NestedDirectoryStore( path) self.root = zarr.group(self.store, overwrite=True) else: # Load existing zarr file self.store = zarr.NestedDirectoryStore(path) if os.path.isdir( path) else zarr.ZipStore(path, mode='r') if mode == 'a' and isinstance(self.store, zarr.ZipStore): self._to_directory() self.root = zarr.open(self.store, mode=mode)
def do_GET(self): if args.format == FORMAT_RAW or self.path.startswith("/socket"): print("Handing request to simple http request handler") super(RequestHandler, self).do_GET() elif self.path.find("/info") >= 0: size = pathlib.Path("info").stat().st_size self.send_response(HTTPStatus.OK) self.send_header("Content-type", 'application/octet-stream') self.send_header("Content-Length", str(size)) self.end_headers() with open("info", "rb") as fd: self.copyfile(fd, self.wfile) elif args.format == FORMAT_TIFF: import tifffile path = self.path[1:] + ".tiff" print(path) if not os.path.exists(path): super(RequestHandler, self).do_GET() return chunk = tifffile.imread(path) self.send_chunk(chunk) elif args.format == FORMAT_ZARR: import zarr level, path = self.path[1:].split('/') if not os.path.exists(level): super(RequestHandler, self).do_GET() return x0, y0, z0 = self.parse_path(path) store = zarr.NestedDirectoryStore(level) z_arr = zarr.open(store, mode='r') chunk = z_arr[z0:z1, y0:y1, x0:x1] self.send_chunk(chunk) elif args.format == FORMAT_BLOCKFS: level, path = self.path[1:].split('/') if not os.path.exists(level): super(RequestHandler, self).do_GET() return x0, y0, z0 = self.parse_path(path) directory = Directory.open( os.path.join(level, "precomputed.blockfs")) chunk = directory.read_block(x0, y0, z0) self.send_chunk(chunk) elif args.format == FORMAT_NGFF: import zarr level, path = self.path[1:].split('/') x0, y0, z0 = self.parse_path(path) store = zarr.NestedDirectoryStore(".") group = zarr.group(store) lx, ly, lz = [int(_) for _ in level.split("_")] llevel = int(np.round(np.log2(lx), 0)) a = group[llevel] _, _, zs, ys, xs = a.chunks z1 = min(a.shape[2], z0 + zs) y1 = min(a.shape[3], y0 + zs) x1 = min(a.shape[4], x0 + xs) chunk = a[0, 0, z0:z1, y0:y1, x0:x1] self.send_chunk(chunk) else: raise ValueError('Invalid format specified')
def main(args=sys.argv[1:]): args = parse_args(args) logging.basicConfig(level=getattr(logging, args.log.upper())) ptype = PType.SEGMENTATION if args.segmentation else PType.IMAGE kwargs = {} if args.chunk_size is not None: cx, cy, cz = [int(_) for _ in args.chunk_size.split(",")] kwargs["chunk_size"] = (cz, cy, cx) if args.format == 'zarr': if args.source.endswith('.tif') or args.source.endswith('.tiff'): stack = ZarrStack(args.source, args.dest, **kwargs) kwargs = {} else: store = zarr.NestedDirectoryStore(args.source) stack = ZarrStack(store, args.dest) elif args.format == 'blockfs': stack = BlockfsStack(args.source, args.dest, ptype=ptype, **kwargs) elif args.format == 'ngff': stack = NGFFStack(args.source, args.dest, **kwargs) stack.create() else: stack = Stack(args.source, args.dest, ptype=ptype, **kwargs) if args.format != 'zarr': if args.n_cores is None: kwargs = {} else: kwargs = dict(n_cores=args.n_cores) voxel_size = [int(float(_) * 1000) for _ in args.voxel_size.split(",")] stack.write_info_file(args.levels, voxel_size) stack.write_level_1(**kwargs) for level in range(2, args.levels + 1): stack.write_level_n(level, **kwargs)
def test_copysegmentation_dvid_to_zarr(setup_dvid_to_zarr): template_dir, config, volume, dvid_address, repo_uuid, output_file = setup_dvid_to_zarr # Modify the config from above to compute pyramid scales, # and choose a bounding box that is aligned with the bricks even at scale 2 # (just for easier testing). box_zyx = [[0, 0, 0], [256, 256, 256]] config["input"]["geometry"]["bounding-box"] = box_zyx config["copysegmentation"]["pyramid-depth"] = 2 yaml = YAML() yaml.default_flow_style = False with open(f"{template_dir}/workflow.yaml", 'w') as f: yaml.dump(config, f) execution_dir, _workflow = launch_flow(template_dir, 1) box_zyx = np.array(box_zyx) scale_0_vol = volume[box_to_slicing(*box_zyx)] scale_1_vol = downsample_labels(scale_0_vol, 2, True) scale_2_vol = downsample_labels(scale_1_vol, 2, True) store = zarr.NestedDirectoryStore(f"{execution_dir}/{output_file}") f = zarr.open(store, 'r') output_0_vol = f['s0'][box_to_slicing(*(box_zyx // 1))] output_1_vol = f['s1'][box_to_slicing(*(box_zyx // 2))] output_2_vol = f['s2'][box_to_slicing(*(box_zyx // 4))] assert (output_0_vol == scale_0_vol).all(), \ "Scale 0: Written vol does not match expected" assert (output_1_vol == scale_1_vol).all(), \ "Scale 1: Written vol does not match expected" assert (output_2_vol == scale_2_vol).all(), \ "Scale 2: Written vol does not match expected"
def nifti_to_zarr_ngff(nifti_file: str) -> str: """Convert the nifti file on disk to a Zarr NGFF store. The Zarr store will have the same path with '.zarr' appended. If the store already exists, it will not be re-created. """ import itk import spatial_image_multiscale import spatial_image_ngff import zarr store_path = convert_to_store_path(nifti_file) if store_path.exists(): return store_path image = itk.imread(str(nifti_file)) da = itk.xarray_from_image(image) da.name = 'image' scale_factors = [2, 2, 2, 2] multiscale = spatial_image_multiscale.to_multiscale(da, scale_factors) store_path = Path(str(nifti_file) + '.zarr') store = zarr.NestedDirectoryStore(str(nifti_file) + '.zarr') spatial_image_ngff.imwrite(multiscale, store) # celery tasks must return a serializable type; using string here return str(store_path)
def _to_directory(self): orig_path = self.store.path if not orig_path.endswith('.zip'): self.store.close() zip_path = orig_path + '.zip' check_and_remove_existing_path(zip_path) os.replace(orig_path, zip_path) self.store = zarr.ZipStore(zip_path, mode='r') else: zip_path = orig_path dest_path = zip_path[:-4] check_and_remove_existing_path(dest_path) dir_store = zarr.NestedDirectoryStore(dest_path) zarr.copy_store(self.store, dir_store) self.store.close() os.remove(zip_path) self.store = dir_store self.root = zarr.open_group(self.store) logger.info( f"Converted ZipStore zarr file {orig_path} to NestedDirectoryStore {dest_path}." )
def access_zarr(dir_path: Union[str, Path], container_path: Union[str, Path], **kwargs) -> Any: if isinstance(dir_path, Path): dir_path = str(dir_path) if isinstance(dir_path, str): dir_path = zarr.NestedDirectoryStore(dir_path) if isinstance(container_path, Path): container_path = str(container_path) attrs = kwargs.pop("attrs", {}) # zarr is extremely slow to delete existing directories, so we do it ourselves if kwargs.get("mode") == "w": tmp_kwargs = kwargs.copy() tmp_kwargs["mode"] = "a" tmp = zarr.open(dir_path, path=str(container_path), **tmp_kwargs) # todo: move this logic to methods on the stores themselves if isinstance( tmp.store, (zarr.N5Store, zarr.DirectoryStore, zarr.NestedDirectoryStore)): logger.info(f'Beginning parallel rmdir of {tmp.path}...') pre = time.time() delete_zbranch(tmp) post = time.time() logger.info( f'Completed parallel rmdir of {tmp.path} in {post - pre}s.') array_or_group = zarr.open(dir_path, path=str(container_path), **kwargs) if kwargs.get("mode") != "r" and len(attrs) > 0: array_or_group.attrs.update(attrs) return array_or_group
def netcdf_to_zarr(src, dst, axis=None, mode='serial', nested=False): """Summary Args: src (TYPE): Description dst (TYPE): Description axis (None, optional): Description mode (str, optional): Description nested (bool, optional): Description """ if isinstance(dst, str): if nested: local_store = zarr.NestedDirectoryStore(dst) else: local_store = zarr.DirectoryStore(dst) else: local_store = dst root = zarr.group(store=local_store, overwrite=True) for i, dname in enumerate(src): # cycling over groups, the first one is the root. for j, gname in enumerate(__get_groups(dname)): if j == 0: group = root ds = '' else: group = __set_group(gname, root) ds = dname if i == 0: __set_meta(ds + gname, group) __set_vars(ds + gname, group, mode) else: __append_vars(gname, group, axis, mode)
def write_level_1(self, silent=False): """Write the first mipmap level, loading from tiff planes""" dest_lvl1 = os.path.join(self.dest, "1_1_1") store = zarr.NestedDirectoryStore(dest_lvl1) z_arr_1 = zarr.open(store, mode='w', chunks=(64, 64, 64), dtype=self.dtype, shape=(self.z_extent, self.y_extent, self.x_extent), compression=self.compressor) z0 = self.z0(1) z1 = self.z1(1) y0 = self.y0(1) y1 = self.y1(1) x0 = self.x0(1) x1 = self.x1(1) if self.files is not None: for z0a, z1a in tqdm.tqdm(zip(z0, z1), total=len(z0), disable=silent): img = np.zeros((z1a - z0a, y1[-1], x1[-1]), self.dtype) for z in range(z0a, z1a): img[z - z0a] = tifffile.imread(self.files[z]) z_arr_1[z0a:z1a] = img elif self.z_arr is not None: # need to decompress to re-chunk the original store for z0a, z1a in tqdm.tqdm(zip(z0, z1), total=len(z0), disable=silent): z_arr_1[z0a:z1a] = self.z_arr[z0a:z1a]
def get_ngff_group_from_url(url: str) -> zarr.Group: """Open the Zarr group from a NGFF file url """ ngff_parse = urlparse(url) ngff_path = os.path.join(ngff_parse.netloc, unquote(ngff_parse.path)) storage = zarr.NestedDirectoryStore(ngff_path) group = zarr.group(storage) return group
def test_zarr(self): with make_case(np.uint16, (128, 128, 128), return_path=True) as (glob_expr, dest, volume): main([ "--source", glob_expr, "--dest", dest, "--levels", "2", "--format", "zarr" ]) store = zarr.NestedDirectoryStore(os.path.join(dest, "1_1_1")) z_arr = zarr.open(store, "r") self.assertSequenceEqual(z_arr.shape, (128, 128, 128))
def create(self, mode="w", compressor=numcodecs.Blosc("zstd", 5)): """ Create or open for append a dataset :param n_channels: # of channels in the zarr :param current_channel: the channel to be written """ store = zarr.NestedDirectoryStore( self.dest) self.zgroup = zarr.group(store, overwrite=(mode == "w")) self.compressor = compressor
def test_write_level_1(self): with make_case(np.uint16, (100, 200, 300), klass=ZarrStack) \ as (stack, npstack): stack.write_level_1() dest_lvl1 = os.path.join(stack.dest, "1_1_1") store = zarr.NestedDirectoryStore(dest_lvl1) z_arr = zarr.open(store, mode='r') block_0_64_256 = z_arr[:64, 64:128, 256:] np.testing.assert_equal(block_0_64_256, npstack[:64, 64:128, 256:]) for (x0, x1), (y0, y1), (z0, z1) in itertools.product( zip(stack.x0(1), stack.x1(1)), zip(stack.y0(1), stack.y1(1)), zip(stack.z0(1), stack.z1(1))): block = z_arr[z0:z1, y0:y1, x0:x1] np.testing.assert_equal(block, npstack[z0:z1, y0:y1, x0:x1])
def test_write_level_2(self): with make_case(np.uint16, (100, 201, 300), klass=ZarrStack) \ as (stack, npstack): stack.write_level_1() stack.write_level_n(2) dest_lvl2 = os.path.join(stack.dest, "2_2_2") store = zarr.NestedDirectoryStore(dest_lvl2) z_arr = zarr.open(store, "r") block = z_arr[0:50, 64:101, 128:150] self.assertSequenceEqual(block.shape, (50, 101 - 64, 150 - 128)) s32 = npstack.astype(np.uint32) first = (s32[0, 128, 256] + s32[1, 128, 256] + s32[0, 129, 256] + s32[1, 129, 256] + s32[0, 128, 257] + s32[1, 128, 257] + s32[0, 129, 257] + s32[1, 129, 257]) // 8 self.assertEqual(block[0, 0, 0], first) last = (s32[-2, -1, -2] + s32[-1, -1, -2] + s32[-2, -1, -1] + s32[-1, -1, -1]) // 4 self.assertEqual(block[-1, -1, -1], last)
def new_zarr(path, shape, chunks, dtype, in_memory=False, **kwargs): """ Create new Zarr NestedDirectoryStore at `path`. **NOTE:** Persistent Zarr arrays are stored on disk. To avoid data loss, be careful when calling `new_zarr` on a path with an existing array. Parameters ---------- path : str Path to new zarr array shape : tuple Overall shape of the zarr array chunks : tuple Shape of each chunk for the zarr array dtype : str Data type of for the zarr array kwargs : dict Keyword args to passs to zarr.open() Returns ------- arr : zarr Array Reference to open zarr array """ compressor = Blosc(cname='zstd', clevel=1, shuffle=Blosc.BITSHUFFLE) if in_memory: z_arr_out = zarr.zeros(shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, **kwargs) else: store = zarr.NestedDirectoryStore(path) z_arr_out = zarr.open(store, mode='w', shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, **kwargs) return z_arr_out
def generate_spec(json_path: str, create_source: bool): source_root = '/groups/cosem/cosem/bennettd/scratch/test.zarr' source_component = 'source' dest_component = 'dest' rank = 3 if create_source: source_data = zarr.open(store=zarr.NestedDirectoryStore(source_root), path=source_component, mode='w', shape=(2048, ) * rank, chunks=(64, ) * rank, dtype='uint8') source_data[:] = 1 source = ReadableArrayStore(url=f'{source_root}/{source_component}', storage_options={}, chunks=(128, ) * rank) dest = WriteableArrayStore(url=f'{source_root}/{dest_component}', storage_options={}, chunks=(64, ) * rank, access_mode=('w', 'w')) downsampling_spec = DownsamplingSpec(method='mean', factors=(2, ) * rank, levels=(0, 1, 3, 4), chunks=(128, ) * rank) cluster_spec = ClusterSpec(deployment='dask_lsf', worker=WorkerSpec(num_workers=1, num_cores=10, memory='15GB')) spec = MultiscaleStorageSpec( source=source, destination=dest, downsampling_spec=downsampling_spec, cluster_spec=cluster_spec, logging_dir='/groups/scicompsoft/home/bennettd/logs') with open(json_path, mode='w') as fh: fh.write(spec.json(indent=2))
def nifti_to_zarr_ngff(nifti_file: Union[str, Path]) -> Path: """Convert the nifti file on disk to a Zarr NGFF store. The Zarr store will have the same path with '.zarr' appended. If the store already exists, it will not be re-created. """ store_path = Path(str(nifti_file) + '.zarr') if store_path.exists(): return store_path image = itk.imread(str(nifti_file)) da = itk.xarray_from_image(image) da.name = 'image' scale_factors = [2, 2, 2, 2] multiscale = spatial_image_multiscale.to_multiscale(da, scale_factors) store_path = Path(str(nifti_file) + '.zarr') store = zarr.NestedDirectoryStore(str(nifti_file) + '.zarr') spatial_image_ngff.imwrite(multiscale, store) return store_path
def open(path, nested=True, mode='a'): """ Opens a persistent Zarr array or NestedDirectoryStore located at `path`. Parameters ---------- path : str Path to Zarr array or NestedDirectoryStore nested : bool Flag to indicate if path is for flat Zarr array or NestedDirectoryStore mode : str Read / write permissions mode Returns ------- arr : zarr Array Reference to open Zarr array """ if nested: store = zarr.NestedDirectoryStore(path) return zarr.open(store, mode=mode) else: return zarr.open(path, mode=mode)
def volume_setup(): tmpdir = tempfile.mkdtemp() path = f"{tmpdir}/test_zarr_service_testvol.zarr" dataset = "/some/volume" config = { "zarr": { "path": path, "dataset": dataset, "global-offset": [1000, 2000, 3000] }, "geometry": {} } volume = np.random.randint(100, size=(512, 256, 128)) store = zarr.NestedDirectoryStore(path) f = zarr.open(store=store, mode='w') f.create_dataset(dataset, data=volume, chunks=(64, 64, 64), compressor=None) return config, volume
def _saveChunkEV(ev, dir_name, chunks): """ Save EventList in chunks on disk. Parameters ---------- ev: :class:`stingray.events.EventList` object EventList to be saved dir_name: string Top Level diretory name where EventList is to be saved chunks: int The number of elements per chunk Raises ------ ValueError If there is no data being saved """ # Creating a Nested Store and multiple groups for temporary saving store = zarr.NestedDirectoryStore(dir_name) ev_data_group = zarr.group(store=store, overwrite=True) main_data_group = ev_data_group.create_group("main_data", overwrite=True) meta_data_group = ev_data_group.create_group("meta_data", overwrite=True) compressor = Blosc(cname="lz4", clevel=1, shuffle=-1) if ev.time is not None and (ev.time.all() or ev.time.size != 0): main_data_group.create_dataset( name="times", data=ev.time, compressor=compressor, overwrite=True, chunks=(chunks, ), ) if ev.energy is not None and (ev.energy.all() or ev.energy.size != 0): main_data_group.create_dataset( name="energy", data=ev.energy, compressor=compressor, overwrite=True, chunks=(chunks, ), ) if ev.pi is not None and (ev.pi.all() or ev.pi.size != 0): main_data_group.create_dataset( name="pi_channel", data=ev.pi, compressor=compressor, overwrite=True, chunks=(chunks, ), ) if ev.gti is not None and (ev.gti.all() or ev.gti.shape[0] != 0): main_data_group.create_dataset(name="gti", data=ev.gti.flatten(), overwrite=True) if ev.dt != 0: meta_data_group.create_dataset(name="dt", data=ev.dt, compressor=compressor, overwrite=True) if ev.ncounts: meta_data_group.create_dataset( name="ncounts", data=ev.ncounts, compressor=compressor, overwrite=True, ) if ev.notes: meta_data_group.create_dataset(name="notes", data=ev.notes, compressor=compressor, overwrite=True) meta_data_group.create_dataset(name="mjdref", data=ev.mjdref, compressor=compressor, overwrite=True)
def read_chunk(url, x0, x1, y0, y1, z0, z1, level=1, format="tiff"): """Read an arbitrary chunk of data :param url: Base URL of the precomputed data source :param x0: starting X coordinate, in the level's coordinate space :param x1: ending X coordinate (non-inclusive) :param y0: starting Y coordinate :param y1: ending Y cooridinate :param z0: starting Z coordinate :param z1: ending Z coordinate :param level: mipmap level :param format: the read format if it's a file URL. Defaults to tiff, but you can use "blockfs" :return: a Numpy array containing the data """ is_file = urlparse(url).scheme.lower() == "file" info = get_info(url) scale = info.get_scale(level) result = np.zeros((z1 - z0, y1 - y0, x1 - x0), info.data_type) shape = np.array(scale.shape) offset = np.array(scale.offset) stride = np.array(scale.chunk_sizes) end = offset + shape x0d = _chunk_start(x0, offset[0], stride[0]) x1d = _chunk_end(x1, offset[0], stride[0], end[0]) y0d = _chunk_start(y0, offset[1], stride[1]) y1d = _chunk_end(y1, offset[1], stride[1], end[1]) z0d = _chunk_start(z0, offset[2], stride[2]) z1d = _chunk_end(z1, offset[2], stride[2], end[2]) for x0c, y0c, z0c in itertools.product(range(x0d, x1d, stride[0]), range(y0d, y1d, stride[1]), range(z0d, z1d, stride[2])): x1c = min(x1d, x0c + stride[0]) y1c = min(y1d, y0c + stride[1]) z1c = min(z1d, z0c + stride[2]) chunk_url = url + "/" + scale.key + "/%d-%d_%d-%d_%d-%d" % ( x0c, x1c, y0c, y1c, z0c, z1c) if is_file: if format == "tiff": chunk_url += ".tiff" with urlopen(chunk_url) as fd: chunk = tifffile.imread(fd) elif format == "blockfs": from blockfs import Directory from .blockfs_stack import BlockfsStack directory_url = url + "/" + scale.key + "/" +\ BlockfsStack.DIRECTORY_FILENAME directory_parse = urlparse(directory_url) directory_path = os.path.join(directory_parse.netloc, unquote(directory_parse.path)) directory = Directory.open(directory_path) chunk = directory.read_block(x0c, y0c, z0c) elif format == 'ngff': group = get_ngff_group_from_url(url) key = str(int(np.log2(level))) dataset = group[key] dataset.read_only = True chunk = dataset[0, 0, z0c:z1c, y0c:y1c, x0c:x1c] elif format == 'zarr': zarr_url = url + "/" + scale.key zarr_parse = urlparse(zarr_url) zarr_path = os.path.join(zarr_parse.netloc, unquote(zarr_parse.path)) storage = zarr.NestedDirectoryStore(zarr_path) dataset = zarr.Array(storage) chunk = dataset[z0c:z1c, y0c:y1c, x0c:x1c] else: raise NotImplementedError("Can't read %s yet" % format) else: response = urlopen(chunk_url) data = response.read() chunk = np.frombuffer(data, info.data_type).reshape( (z1c - z0c, y1c - y0c, x1c - x0c)) if z0c < z0: chunk = chunk[z0 - z0c:] z0c = z0 if z1c > z1: chunk = chunk[:z1 - z0c] z1c = z1 if y0c < y0: chunk = chunk[:, y0 - y0c:] y0c = y0 if y1c > y1: chunk = chunk[:, :y1 - y0c] y1c = y1 if x0c < x0: chunk = chunk[:, :, x0 - x0c:] x0c = x0 if x1c > x1: chunk = chunk[:, :, :x1 - x0c] x1c = x1 result[z0c - z0:z0c - z0 + chunk.shape[0], y0c - y0:y0c - y0 + chunk.shape[1], x0c - x0:x0c - x0 + chunk.shape[2]] = chunk return result
def serve_precomputed(environ, start_response, config_file): config = get_config(config_file) path_info = environ["PATH_INFO"] if path_info == "/": return serve_directory(start_response, config_file) for source in config: if path_info[1:].startswith(source["name"] + "/"): try: filename = path_info[2 + len(source["name"]):] dest = os.path.join(source["directory"]) if filename == "mesh/info": return file_not_found(filename, start_response) elif filename == "info": logging.info("Serving %s" % source["name"]) destpath = os.path.join(dest, "info") if not os.path.exists(destpath): return file_not_found(destpath, start_response) with open(destpath, "rb") as fd: data = fd.read() start_response("200 OK", [("Content-type", "application/json"), ("Content-Length", str(len(data))), ('Access-Control-Allow-Origin', '*')]) return [data] elif source["format"] == "tiff": import tifffile path = os.path.join(dest, filename + ".tiff") if not os.path.exists(path): return file_not_found(path, start_response) img = tifffile.imread(path) data = img.tostring("C") start_response( "200 OK", [("Content-type", "application/octet-stream"), ("Content-Length", str(len(data))), ('Access-Control-Allow-Origin', '*')]) return [data] elif source["format"] == "zarr": import zarr filename, x0, x1, y0, y1, z0, z1 = \ parse_filename(dest, filename) if not os.path.exists(filename): return file_not_found(filename, start_response) store = zarr.NestedDirectoryStore(filename) z_arr = zarr.open(store, mode='r') chunk = z_arr[z0:z1, y0:y1, x0:x1] data = chunk.tostring("C") start_response( "200 OK", [("Content-type", "application/octet-stream"), ("Content-Length", str(len(data))), ('Access-Control-Allow-Origin', '*')]) return [data] elif source["format"] == "blockfs": from blockfs import Directory filename, x0, x1, y0, y1, z0, z1 = \ parse_filename(dest, filename) filename = os.path.join(filename, "precomputed.blockfs") directory = Directory.open(filename) chunk = directory.read_block(x0, y0, z0) data = chunk.tostring("C") start_response( "200 OK", [("Content-type", "application/octet-stream"), ("Content-Length", str(len(data))), ('Access-Control-Allow-Origin', '*')]) return [data] elif source["format"] == "ngff": import zarr filename, x0, x1, y0, y1, z0, z1 = \ parse_filename(dest, filename) root, level = os.path.split(filename) lx, ly, lz = [int(_) for _ in level.split("_")] llevel = int(np.round(np.log2(lx), 0)) store = zarr.NestedDirectoryStore(root) group = zarr.group(store) a = group[llevel] _, _, zs, ys, xs = a.chunks z1 = min(a.shape[2], z0 + zs) y1 = min(a.shape[3], y0 + ys) x1 = min(a.shape[4], x0 + xs) chunk = a[0, 0, z0:z1, y0:y1, x0:x1] data = chunk.tostring("C") start_response( "200 OK", [("Content-type", "application/octet-stream"), ("Content-Length", str(len(data))), ('Access-Control-Allow-Origin', '*')]) return [data] except ParseFilenameError: return file_not_found(path_info, start_response) else: return file_not_found(path_info, start_response)
def test_connectedcomponents(setup_connectedcomponents_hdf5_zarr, disable_auto_retry): template_dir, _config, input_vol = setup_connectedcomponents_hdf5_zarr execution_dir, workflow = launch_flow(template_dir, 1) final_config = workflow.config output_path = final_config["output"]["zarr"]["path"] dset_name = final_config["output"]["zarr"]["dataset"] store = zarr.NestedDirectoryStore(output_path) f = zarr.open(store=store, mode='r') output_vol = f[dset_name][:] assert output_vol.shape == input_vol.shape final_labels = pd.unique(output_vol.reshape(-1)) # Never change label 0 assert 0 in final_labels assert ((input_vol == 0) == (output_vol == 0)).all() # Single-component objects assert 2 in final_labels assert 4 in final_labels assert ((input_vol == 2) == (output_vol == 2)).all() assert ((input_vol == 4) == (output_vol == 4)).all() # Split objects assert 1 not in final_labels assert 3 not in final_labels for corner in map(np.array, ndrange((0, 0, 0), (1, 8, 8), (1, 4, 4))): box = (corner, corner + (1, 4, 4)) input_block = extract_subvol(input_vol, box) output_block = extract_subvol(output_vol, box) for orig_label in [1, 3]: if orig_label in input_block: positions = (input_block == orig_label) assert (input_block[positions] != output_block[positions]).all(), \ f"original label {orig_label} was not split!" assert (output_block[positions] > input_vol.max()).all(), \ f"original label {orig_label} was not split!" # This block-based assertion is not generally true for all possible input, # but our test data blocks are set up so that this is a valid check. # (No block happens to contain more than one final CC that came from the same original label.) assert (output_block[positions] == output_block[positions][0]).all(), \ f"original label {orig_label} ended up over-segmentated" # Check CSV output df = pd.read_csv(f'{execution_dir}/relabeled-objects.csv') assert len(df.query('orig_label == 0')) == 0 assert len(df.query('orig_label == 1')) == 3 assert len(df.query('orig_label == 2')) == 0 assert len(df.query('orig_label == 3')) == 2 assert len(df.query('orig_label == 4')) == 0 assert not df['final_label'].duplicated().any() assert (df['final_label'] > input_vol.max()).all()
from pathlib import Path import numpy as np import zarr from numcodecs import Zlib, GZip, BZ2 # Nested directory store nested_test_path = Path.home() / 'tmp' / 'zarr-test-nested.zarr' group_path = 'test/data' nested_store = zarr.NestedDirectoryStore(str(nested_test_path)) nested_root = zarr.group(store=nested_store, overwrite=True) nested_group = nested_root.create_group(group_path) array_3x2_c = np.arange(0, 3 * 2).reshape(2, 3) nested_group.array(name='3x2_c_|u1', dtype='|u1', data=array_3x2_c, chunks=(2, 3), overwrite=True)
def _saveFITSZarr(f_name, dir_name, chunks): """ Read a FITS file and save it for further processing. Parameters ---------- f_name: string The name of file with which object was saved dir_name: string The name of the top level directory where the file is to be stored chunks: int The number of elements per chunk """ compressor = Blosc(cname="lz4", clevel=1, shuffle=-1) store = zarr.NestedDirectoryStore(dir_name) fits_data_group = zarr.group(store=store, overwrite=True) main_data_group = fits_data_group.create_group("main_data", overwrite=True) meta_data_group = fits_data_group.create_group("meta_data", overwrite=True) with fits.open(f_name, memmap=True) as fits_data: for HDUList in fits_data: if HDUList.name == "EVENTS": times = HDUList.data["TIME"] chunks = times.size if times.size < chunks else chunks main_data_group.create_dataset( name="times", data=times, compressor=compressor, overwrite=True, chunks=(chunks, ), ) for col in ["PI", "PHA"]: if col in HDUList.data.columns.names: main_data_group.create_dataset( name=f"{col.lower()}_channel", data=HDUList.data[col], compressor=compressor, overwrite=True, chunks=(chunks, ), ) meta_data_group.create_dataset( name="tstart", data=HDUList.header["TSTART"], compressor=compressor, overwrite=True, ) meta_data_group.create_dataset( name="tstop", data=HDUList.header["TSTOP"], compressor=compressor, overwrite=True, ) meta_data_group.create_dataset( name="mjdref", data=high_precision_keyword_read(HDUList.header, "MJDREF"), compressor=compressor, overwrite=True, ) elif HDUList.name == "GTI": # TODO: Needs to be generalized start, stop = HDUList.data["START"], HDUList.data["STOP"] gti = np.array(list(zip(start, stop))) main_data_group.create_dataset( name="gti", data=gti.flatten(), compressor=compressor, overwrite=True, )
def write_level_n(self, level, silent=False): src_resolution = self.resolution(level - 1) dest_resolution = self.resolution(level) src = os.path.join( self.dest, "%d_%d_%d" % (src_resolution, src_resolution, src_resolution)) dest = os.path.join( self.dest, "%d_%d_%d" % (dest_resolution, dest_resolution, dest_resolution)) src_store = zarr.NestedDirectoryStore(src) src_zarr = zarr.open(src_store, mode='r') dest_store = zarr.NestedDirectoryStore(dest) dest_zarr = zarr.open(dest_store, mode='w', chunks=(64, 64, 64), dtype=self.dtype, shape=(self.z1(level)[-1], self.y1(level)[-1], self.x1(level)[-1]), compression=self.compressor) z0s = self.z0(level - 1) # source block coordinates z1s = self.z1(level - 1) y0s = self.y0(level - 1) y1s = self.y1(level - 1) x0s = self.x0(level - 1) x1s = self.x1(level - 1) z0d = self.z0(level) # dest block coordinates z1d = self.z1(level) y0d = self.y0(level) y1d = self.y1(level) x0d = self.x0(level) x1d = self.x1(level) for xidx, yidx, zidx in tqdm.tqdm( list( itertools.product(range(self.n_x(level)), range(self.n_y(level)), range(self.n_z(level)))), disable=silent ): # looping over destination block indicies (fewer blocks than source) block = np.zeros((z1d[zidx] - z0d[zidx], y1d[yidx] - y0d[yidx], x1d[xidx] - x0d[xidx]), np.uint64) hits = np.zeros((z1d[zidx] - z0d[zidx], y1d[yidx] - y0d[yidx], x1d[xidx] - x0d[xidx]), np.uint64) for xsi1, ysi1, zsi1 in itertools.product( (0, 1), (0, 1), (0, 1)): # looping over source blocks for this destination xsi = xsi1 + xidx * 2 if xsi == self.n_x( level - 1 ): # Check for any source blocks that are out-of-bounds continue ysi = ysi1 + yidx * 2 if ysi == self.n_y(level - 1): continue zsi = zsi1 + zidx * 2 if zsi == self.n_z(level - 1): continue src_block = src_zarr[z0s[zsi]:z1s[zsi], y0s[ysi]:y1s[ysi], x0s[xsi]:x1s[xsi]] for offx, offy, offz in \ itertools.product((0, 1), (0, 1), (0,1)): dsblock = src_block[offz::2, offy::2, offx::2] block[zsi1*32:zsi1*32 + dsblock.shape[0], ysi1*32:ysi1*32 + dsblock.shape[1], xsi1*32:xsi1*32 + dsblock.shape[2]] += \ dsblock.astype(block.dtype) # 32 is half-block size of source hits[zsi1 * 32:zsi1 * 32 + dsblock.shape[0], ysi1 * 32:ysi1 * 32 + dsblock.shape[1], xsi1 * 32:xsi1 * 32 + dsblock.shape[2]] += 1 block[hits > 0] = block[hits > 0] // hits[hits > 0] dest_zarr[z0d[zidx]:z1d[zidx], y0d[yidx]:y1d[yidx], x0d[xidx]:x1d[xidx]] = block
def _saveChunkLC(lc, dir_name, chunks): """ Save Lightcurve in chunks on disk. Parameters ---------- lc: :class:`stingray.Lightcurve` object Lightcurve to be saved dir_name: string Top Level diretory name where Lightcurve is to be saved chunks: int The number of elements per chunk """ # Creating a Nested Store and multiple groups for temporary saving store = zarr.NestedDirectoryStore(dir_name) lc_data_group = zarr.group(store=store, overwrite=True) main_data_group = lc_data_group.create_group("main_data", overwrite=True) meta_data_group = lc_data_group.create_group("meta_data", overwrite=True) compressor = Blosc(cname="lz4", clevel=1, shuffle=-1) # Optimal main_data_group.create_dataset( name="times", data=lc.time, compressor=compressor, overwrite=True, chunks=(chunks, ), ) main_data_group.create_dataset( name="counts", data=lc.counts, compressor=compressor, overwrite=True, chunks=(chunks, ), ) if lc._counts_err is not None: main_data_group.create_dataset( name="count_err", data=lc.counts_err, compressor=compressor, overwrite=True, chunks=(chunks, ), ) main_data_group.create_dataset(name="gti", data=lc.gti.flatten(), overwrite=True) meta_data_group.create_dataset(name="dt", data=lc.dt, compressor=compressor, overwrite=True) meta_data_group.create_dataset( name="err_dist", data=lc.err_dist, compressor=compressor, overwrite=True, ) meta_data_group.create_dataset(name="mjdref", data=lc.mjdref, compressor=compressor, overwrite=True)