コード例 #1
0
ファイル: zarr_utils.py プロジェクト: slowkow/pegasusio
    def __init__(self,
                 path: str,
                 mode: str = 'r',
                 storage_type: str = None) -> None:
        """ Initialize a Zarr file, if mode == 'w', create an empty one, otherwise, load from path
        path : `str`, path for the zarr object.
        storage_type : `str`, currently only support 'ZipStore' and 'NestedDirectoryStore'. If None, use 'NestedDirectoryStore' by default.
        """
        self.store = self.root = None

        if storage_type is None:
            storage_type = 'NestedDirectoryStore'

        if mode == 'w':
            # Create a new zarr file
            check_and_remove_existing_path(path)
            self.store = zarr.ZipStore(
                path, mode='w'
            ) if storage_type == 'ZipStore' else zarr.NestedDirectoryStore(
                path)
            self.root = zarr.group(self.store, overwrite=True)
        else:
            # Load existing zarr file
            self.store = zarr.NestedDirectoryStore(path) if os.path.isdir(
                path) else zarr.ZipStore(path, mode='r')
            if mode == 'a' and isinstance(self.store, zarr.ZipStore):
                self._to_directory()
            self.root = zarr.open(self.store, mode=mode)
コード例 #2
0
    def do_GET(self):
        if args.format == FORMAT_RAW or self.path.startswith("/socket"):
            print("Handing request to simple http request handler")
            super(RequestHandler, self).do_GET()
        elif self.path.find("/info") >= 0:
            size = pathlib.Path("info").stat().st_size
            self.send_response(HTTPStatus.OK)
            self.send_header("Content-type", 'application/octet-stream')
            self.send_header("Content-Length", str(size))
            self.end_headers()
            with open("info", "rb") as fd:
                self.copyfile(fd, self.wfile)

        elif args.format == FORMAT_TIFF:
            import tifffile
            path = self.path[1:] + ".tiff"
            print(path)
            if not os.path.exists(path):
                super(RequestHandler, self).do_GET()
                return
            chunk = tifffile.imread(path)
            self.send_chunk(chunk)
        elif args.format == FORMAT_ZARR:
            import zarr
            level, path = self.path[1:].split('/')
            if not os.path.exists(level):
                super(RequestHandler, self).do_GET()
                return
            x0, y0, z0 = self.parse_path(path)
            store = zarr.NestedDirectoryStore(level)
            z_arr = zarr.open(store, mode='r')
            chunk = z_arr[z0:z1, y0:y1, x0:x1]
            self.send_chunk(chunk)
        elif args.format == FORMAT_BLOCKFS:
            level, path = self.path[1:].split('/')
            if not os.path.exists(level):
                super(RequestHandler, self).do_GET()
                return
            x0, y0, z0 = self.parse_path(path)
            directory = Directory.open(
                os.path.join(level, "precomputed.blockfs"))
            chunk = directory.read_block(x0, y0, z0)
            self.send_chunk(chunk)
        elif args.format == FORMAT_NGFF:
            import zarr
            level, path = self.path[1:].split('/')
            x0, y0, z0 = self.parse_path(path)
            store = zarr.NestedDirectoryStore(".")
            group = zarr.group(store)
            lx, ly, lz = [int(_) for _ in level.split("_")]
            llevel = int(np.round(np.log2(lx), 0))
            a = group[llevel]
            _, _, zs, ys, xs = a.chunks
            z1 = min(a.shape[2], z0 + zs)
            y1 = min(a.shape[3], y0 + zs)
            x1 = min(a.shape[4], x0 + xs)
            chunk = a[0, 0, z0:z1, y0:y1, x0:x1]
            self.send_chunk(chunk)
        else:
            raise ValueError('Invalid format specified')
コード例 #3
0
ファイル: main.py プロジェクト: chunglabmit/precomputed-tif
def main(args=sys.argv[1:]):
    args = parse_args(args)
    logging.basicConfig(level=getattr(logging, args.log.upper()))
    ptype = PType.SEGMENTATION if args.segmentation else PType.IMAGE
    kwargs = {}
    if args.chunk_size is not None:
        cx, cy, cz = [int(_) for _ in args.chunk_size.split(",")]
        kwargs["chunk_size"] = (cz, cy, cx)
    if args.format == 'zarr':
        if args.source.endswith('.tif') or args.source.endswith('.tiff'):
            stack = ZarrStack(args.source, args.dest, **kwargs)
            kwargs = {}
        else:
            store = zarr.NestedDirectoryStore(args.source)
            stack = ZarrStack(store, args.dest)
    elif args.format == 'blockfs':
        stack = BlockfsStack(args.source, args.dest, ptype=ptype, **kwargs)
    elif args.format == 'ngff':
        stack = NGFFStack(args.source, args.dest, **kwargs)
        stack.create()
    else:
        stack = Stack(args.source, args.dest, ptype=ptype, **kwargs)
    if args.format != 'zarr':
        if args.n_cores is None:
            kwargs = {}
        else:
            kwargs = dict(n_cores=args.n_cores)

    voxel_size = [int(float(_) * 1000) for _ in args.voxel_size.split(",")]
    stack.write_info_file(args.levels, voxel_size)
    stack.write_level_1(**kwargs)
    for level in range(2, args.levels + 1):
        stack.write_level_n(level, **kwargs)
コード例 #4
0
def test_copysegmentation_dvid_to_zarr(setup_dvid_to_zarr):
    template_dir, config, volume, dvid_address, repo_uuid, output_file = setup_dvid_to_zarr

    # Modify the config from above to compute pyramid scales,
    # and choose a bounding box that is aligned with the bricks even at scale 2
    # (just for easier testing).
    box_zyx = [[0, 0, 0], [256, 256, 256]]
    config["input"]["geometry"]["bounding-box"] = box_zyx
    config["copysegmentation"]["pyramid-depth"] = 2

    yaml = YAML()
    yaml.default_flow_style = False
    with open(f"{template_dir}/workflow.yaml", 'w') as f:
        yaml.dump(config, f)

    execution_dir, _workflow = launch_flow(template_dir, 1)

    box_zyx = np.array(box_zyx)

    scale_0_vol = volume[box_to_slicing(*box_zyx)]
    scale_1_vol = downsample_labels(scale_0_vol, 2, True)
    scale_2_vol = downsample_labels(scale_1_vol, 2, True)

    store = zarr.NestedDirectoryStore(f"{execution_dir}/{output_file}")
    f = zarr.open(store, 'r')
    output_0_vol = f['s0'][box_to_slicing(*(box_zyx // 1))]
    output_1_vol = f['s1'][box_to_slicing(*(box_zyx // 2))]
    output_2_vol = f['s2'][box_to_slicing(*(box_zyx // 4))]

    assert (output_0_vol == scale_0_vol).all(), \
        "Scale 0: Written vol does not match expected"
    assert (output_1_vol == scale_1_vol).all(), \
        "Scale 1: Written vol does not match expected"
    assert (output_2_vol == scale_2_vol).all(), \
        "Scale 2: Written vol does not match expected"
コード例 #5
0
def nifti_to_zarr_ngff(nifti_file: str) -> str:
    """Convert the nifti file on disk to a Zarr NGFF store.

    The Zarr store will have the same path with '.zarr' appended.

    If the store already exists, it will not be re-created.
    """
    import itk
    import spatial_image_multiscale
    import spatial_image_ngff
    import zarr

    store_path = convert_to_store_path(nifti_file)
    if store_path.exists():
        return store_path
    image = itk.imread(str(nifti_file))
    da = itk.xarray_from_image(image)
    da.name = 'image'

    scale_factors = [2, 2, 2, 2]
    multiscale = spatial_image_multiscale.to_multiscale(da, scale_factors)

    store_path = Path(str(nifti_file) + '.zarr')
    store = zarr.NestedDirectoryStore(str(nifti_file) + '.zarr')
    spatial_image_ngff.imwrite(multiscale, store)

    # celery tasks must return a serializable type; using string here
    return str(store_path)
コード例 #6
0
ファイル: zarr_utils.py プロジェクト: slowkow/pegasusio
    def _to_directory(self):
        orig_path = self.store.path

        if not orig_path.endswith('.zip'):
            self.store.close()
            zip_path = orig_path + '.zip'
            check_and_remove_existing_path(zip_path)
            os.replace(orig_path, zip_path)
            self.store = zarr.ZipStore(zip_path, mode='r')
        else:
            zip_path = orig_path

        dest_path = zip_path[:-4]
        check_and_remove_existing_path(dest_path)
        dir_store = zarr.NestedDirectoryStore(dest_path)
        zarr.copy_store(self.store, dir_store)
        self.store.close()
        os.remove(zip_path)

        self.store = dir_store
        self.root = zarr.open_group(self.store)

        logger.info(
            f"Converted ZipStore zarr file {orig_path} to NestedDirectoryStore {dest_path}."
        )
コード例 #7
0
def access_zarr(dir_path: Union[str, Path], container_path: Union[str, Path],
                **kwargs) -> Any:
    if isinstance(dir_path, Path):
        dir_path = str(dir_path)

    if isinstance(dir_path, str):
        dir_path = zarr.NestedDirectoryStore(dir_path)

    if isinstance(container_path, Path):
        container_path = str(container_path)

    attrs = kwargs.pop("attrs", {})

    # zarr is extremely slow to delete existing directories, so we do it ourselves
    if kwargs.get("mode") == "w":
        tmp_kwargs = kwargs.copy()
        tmp_kwargs["mode"] = "a"
        tmp = zarr.open(dir_path, path=str(container_path), **tmp_kwargs)
        # todo: move this logic to methods on the stores themselves
        if isinstance(
                tmp.store,
            (zarr.N5Store, zarr.DirectoryStore, zarr.NestedDirectoryStore)):
            logger.info(f'Beginning parallel rmdir of {tmp.path}...')
            pre = time.time()
            delete_zbranch(tmp)
            post = time.time()
            logger.info(
                f'Completed parallel rmdir of {tmp.path} in {post - pre}s.')
    array_or_group = zarr.open(dir_path, path=str(container_path), **kwargs)
    if kwargs.get("mode") != "r" and len(attrs) > 0:
        array_or_group.attrs.update(attrs)
    return array_or_group
コード例 #8
0
ファイル: convert.py プロジェクト: serapred/netcdf-to-zarr
def netcdf_to_zarr(src, dst, axis=None, mode='serial', nested=False):
    """Summary

    Args:
        src (TYPE): Description
        dst (TYPE): Description
        axis (None, optional): Description
        mode (str, optional): Description
        nested (bool, optional): Description
    """
    if isinstance(dst, str):
        if nested:
            local_store = zarr.NestedDirectoryStore(dst)
        else:
            local_store = zarr.DirectoryStore(dst)
    else:
        local_store = dst

    root = zarr.group(store=local_store, overwrite=True)

    for i, dname in enumerate(src):
        # cycling over groups, the first one is the root.
        for j, gname in enumerate(__get_groups(dname)):
            if j == 0:
                group = root
                ds = ''
            else:
                group = __set_group(gname, root)
                ds = dname
            if i == 0:
                __set_meta(ds + gname, group)
                __set_vars(ds + gname, group, mode)
            else:
                __append_vars(gname, group, axis, mode)
コード例 #9
0
    def write_level_1(self, silent=False):
        """Write the first mipmap level, loading from tiff planes"""
        dest_lvl1 = os.path.join(self.dest, "1_1_1")
        store = zarr.NestedDirectoryStore(dest_lvl1)

        z_arr_1 = zarr.open(store,
                            mode='w',
                            chunks=(64, 64, 64),
                            dtype=self.dtype,
                            shape=(self.z_extent, self.y_extent,
                                   self.x_extent),
                            compression=self.compressor)

        z0 = self.z0(1)
        z1 = self.z1(1)
        y0 = self.y0(1)
        y1 = self.y1(1)
        x0 = self.x0(1)
        x1 = self.x1(1)

        if self.files is not None:
            for z0a, z1a in tqdm.tqdm(zip(z0, z1),
                                      total=len(z0),
                                      disable=silent):
                img = np.zeros((z1a - z0a, y1[-1], x1[-1]), self.dtype)
                for z in range(z0a, z1a):
                    img[z - z0a] = tifffile.imread(self.files[z])
                z_arr_1[z0a:z1a] = img
        elif self.z_arr is not None:  # need to decompress to re-chunk the original store
            for z0a, z1a in tqdm.tqdm(zip(z0, z1),
                                      total=len(z0),
                                      disable=silent):
                z_arr_1[z0a:z1a] = self.z_arr[z0a:z1a]
コード例 #10
0
def get_ngff_group_from_url(url: str) -> zarr.Group:
    """Open the Zarr group from a NGFF file url
    """
    ngff_parse = urlparse(url)
    ngff_path = os.path.join(ngff_parse.netloc, unquote(ngff_parse.path))
    storage = zarr.NestedDirectoryStore(ngff_path)
    group = zarr.group(storage)
    return group
コード例 #11
0
 def test_zarr(self):
     with make_case(np.uint16, (128, 128, 128),
                    return_path=True) as (glob_expr, dest, volume):
         main([
             "--source", glob_expr, "--dest", dest, "--levels", "2",
             "--format", "zarr"
         ])
         store = zarr.NestedDirectoryStore(os.path.join(dest, "1_1_1"))
         z_arr = zarr.open(store, "r")
         self.assertSequenceEqual(z_arr.shape, (128, 128, 128))
コード例 #12
0
    def create(self, mode="w", compressor=numcodecs.Blosc("zstd", 5)):
        """
        Create or open for append a dataset

        :param n_channels: # of channels in the zarr
        :param current_channel: the channel to be written
        """
        store = zarr.NestedDirectoryStore(
            self.dest)
        self.zgroup = zarr.group(store,
                                 overwrite=(mode == "w"))
        self.compressor = compressor
コード例 #13
0
    def test_write_level_1(self):
        with make_case(np.uint16, (100, 200, 300), klass=ZarrStack) \
                as (stack, npstack):
            stack.write_level_1()
            dest_lvl1 = os.path.join(stack.dest, "1_1_1")
            store = zarr.NestedDirectoryStore(dest_lvl1)

            z_arr = zarr.open(store, mode='r')
            block_0_64_256 = z_arr[:64, 64:128, 256:]
            np.testing.assert_equal(block_0_64_256, npstack[:64, 64:128, 256:])
            for (x0, x1), (y0, y1), (z0, z1) in itertools.product(
                    zip(stack.x0(1), stack.x1(1)), zip(stack.y0(1),
                                                       stack.y1(1)),
                    zip(stack.z0(1), stack.z1(1))):
                block = z_arr[z0:z1, y0:y1, x0:x1]
                np.testing.assert_equal(block, npstack[z0:z1, y0:y1, x0:x1])
コード例 #14
0
 def test_write_level_2(self):
     with make_case(np.uint16, (100, 201, 300), klass=ZarrStack) \
             as (stack, npstack):
         stack.write_level_1()
         stack.write_level_n(2)
         dest_lvl2 = os.path.join(stack.dest, "2_2_2")
         store = zarr.NestedDirectoryStore(dest_lvl2)
         z_arr = zarr.open(store, "r")
         block = z_arr[0:50, 64:101, 128:150]
         self.assertSequenceEqual(block.shape, (50, 101 - 64, 150 - 128))
         s32 = npstack.astype(np.uint32)
         first = (s32[0, 128, 256] + s32[1, 128, 256] + s32[0, 129, 256] +
                  s32[1, 129, 256] + s32[0, 128, 257] + s32[1, 128, 257] +
                  s32[0, 129, 257] + s32[1, 129, 257]) // 8
         self.assertEqual(block[0, 0, 0], first)
         last = (s32[-2, -1, -2] + s32[-1, -1, -2] + s32[-2, -1, -1] +
                 s32[-1, -1, -1]) // 4
         self.assertEqual(block[-1, -1, -1], last)
コード例 #15
0
ファイル: io.py プロジェクト: chunglabmit/scout
def new_zarr(path, shape, chunks, dtype, in_memory=False, **kwargs):
    """
    Create new Zarr NestedDirectoryStore at `path`.

    **NOTE:** Persistent Zarr arrays are stored on disk. To avoid data loss, be careful when calling `new_zarr`
    on a path with an existing array.

    Parameters
    ----------
    path : str
        Path to new zarr array
    shape : tuple
        Overall shape of the zarr array
    chunks : tuple
        Shape of each chunk for the zarr array
    dtype : str
        Data type of for the zarr array
    kwargs : dict
        Keyword args to passs to zarr.open()

    Returns
    -------
    arr : zarr Array
        Reference to open zarr array
    """
    compressor = Blosc(cname='zstd', clevel=1, shuffle=Blosc.BITSHUFFLE)
    if in_memory:
        z_arr_out = zarr.zeros(shape=shape,
                               chunks=chunks,
                               dtype=dtype,
                               compressor=compressor,
                               **kwargs)
    else:
        store = zarr.NestedDirectoryStore(path)
        z_arr_out = zarr.open(store,
                              mode='w',
                              shape=shape,
                              chunks=chunks,
                              dtype=dtype,
                              compressor=compressor,
                              **kwargs)
    return z_arr_out
コード例 #16
0
def generate_spec(json_path: str, create_source: bool):
    source_root = '/groups/cosem/cosem/bennettd/scratch/test.zarr'
    source_component = 'source'
    dest_component = 'dest'
    rank = 3
    if create_source:
        source_data = zarr.open(store=zarr.NestedDirectoryStore(source_root),
                                path=source_component,
                                mode='w',
                                shape=(2048, ) * rank,
                                chunks=(64, ) * rank,
                                dtype='uint8')
        source_data[:] = 1
    source = ReadableArrayStore(url=f'{source_root}/{source_component}',
                                storage_options={},
                                chunks=(128, ) * rank)

    dest = WriteableArrayStore(url=f'{source_root}/{dest_component}',
                               storage_options={},
                               chunks=(64, ) * rank,
                               access_mode=('w', 'w'))

    downsampling_spec = DownsamplingSpec(method='mean',
                                         factors=(2, ) * rank,
                                         levels=(0, 1, 3, 4),
                                         chunks=(128, ) * rank)

    cluster_spec = ClusterSpec(deployment='dask_lsf',
                               worker=WorkerSpec(num_workers=1,
                                                 num_cores=10,
                                                 memory='15GB'))

    spec = MultiscaleStorageSpec(
        source=source,
        destination=dest,
        downsampling_spec=downsampling_spec,
        cluster_spec=cluster_spec,
        logging_dir='/groups/scicompsoft/home/bennettd/logs')

    with open(json_path, mode='w') as fh:
        fh.write(spec.json(indent=2))
コード例 #17
0
ファイル: nifti_to_zarr_ngff.py プロジェクト: thewtex/miqa
def nifti_to_zarr_ngff(nifti_file: Union[str, Path]) -> Path:
    """Convert the nifti file on disk to a Zarr NGFF store.

    The Zarr store will have the same path with '.zarr' appended.

    If the store already exists, it will not be re-created.
    """
    store_path = Path(str(nifti_file) + '.zarr')
    if store_path.exists():
        return store_path
    image = itk.imread(str(nifti_file))
    da = itk.xarray_from_image(image)
    da.name = 'image'

    scale_factors = [2, 2, 2, 2]
    multiscale = spatial_image_multiscale.to_multiscale(da, scale_factors)

    store_path = Path(str(nifti_file) + '.zarr')
    store = zarr.NestedDirectoryStore(str(nifti_file) + '.zarr')
    spatial_image_ngff.imwrite(multiscale, store)

    return store_path
コード例 #18
0
ファイル: io.py プロジェクト: chunglabmit/scout
def open(path, nested=True, mode='a'):
    """
    Opens a persistent Zarr array or NestedDirectoryStore located at `path`.

    Parameters
    ----------
    path : str
        Path to Zarr array or NestedDirectoryStore
    nested : bool
        Flag to indicate if path is for flat Zarr array or NestedDirectoryStore
    mode : str
        Read / write permissions mode

    Returns
    -------
    arr : zarr Array
        Reference to open Zarr array
    """
    if nested:
        store = zarr.NestedDirectoryStore(path)
        return zarr.open(store, mode=mode)
    else:
        return zarr.open(path, mode=mode)
コード例 #19
0
def volume_setup():
    tmpdir = tempfile.mkdtemp()
    path = f"{tmpdir}/test_zarr_service_testvol.zarr"
    dataset = "/some/volume"

    config = {
        "zarr": {
            "path": path,
            "dataset": dataset,
            "global-offset": [1000, 2000, 3000]
        },
        "geometry": {}
    }

    volume = np.random.randint(100, size=(512, 256, 128))

    store = zarr.NestedDirectoryStore(path)
    f = zarr.open(store=store, mode='w')
    f.create_dataset(dataset,
                     data=volume,
                     chunks=(64, 64, 64),
                     compressor=None)

    return config, volume
コード例 #20
0
ファイル: largememory.py プロジェクト: godslayer201/stingray
def _saveChunkEV(ev, dir_name, chunks):
    """
    Save EventList in chunks on disk.

    Parameters
    ----------
    ev: :class:`stingray.events.EventList` object
        EventList to be saved

    dir_name: string
        Top Level diretory name where EventList is to be saved

    chunks: int
        The number of elements per chunk

    Raises
    ------
    ValueError
        If there is no data being saved
    """
    # Creating a Nested Store and multiple groups for temporary saving
    store = zarr.NestedDirectoryStore(dir_name)
    ev_data_group = zarr.group(store=store, overwrite=True)
    main_data_group = ev_data_group.create_group("main_data", overwrite=True)
    meta_data_group = ev_data_group.create_group("meta_data", overwrite=True)

    compressor = Blosc(cname="lz4", clevel=1, shuffle=-1)

    if ev.time is not None and (ev.time.all() or ev.time.size != 0):
        main_data_group.create_dataset(
            name="times",
            data=ev.time,
            compressor=compressor,
            overwrite=True,
            chunks=(chunks, ),
        )

    if ev.energy is not None and (ev.energy.all() or ev.energy.size != 0):
        main_data_group.create_dataset(
            name="energy",
            data=ev.energy,
            compressor=compressor,
            overwrite=True,
            chunks=(chunks, ),
        )

    if ev.pi is not None and (ev.pi.all() or ev.pi.size != 0):
        main_data_group.create_dataset(
            name="pi_channel",
            data=ev.pi,
            compressor=compressor,
            overwrite=True,
            chunks=(chunks, ),
        )

    if ev.gti is not None and (ev.gti.all() or ev.gti.shape[0] != 0):
        main_data_group.create_dataset(name="gti",
                                       data=ev.gti.flatten(),
                                       overwrite=True)

    if ev.dt != 0:
        meta_data_group.create_dataset(name="dt",
                                       data=ev.dt,
                                       compressor=compressor,
                                       overwrite=True)

    if ev.ncounts:
        meta_data_group.create_dataset(
            name="ncounts",
            data=ev.ncounts,
            compressor=compressor,
            overwrite=True,
        )

    if ev.notes:
        meta_data_group.create_dataset(name="notes",
                                       data=ev.notes,
                                       compressor=compressor,
                                       overwrite=True)

    meta_data_group.create_dataset(name="mjdref",
                                   data=ev.mjdref,
                                   compressor=compressor,
                                   overwrite=True)
コード例 #21
0
def read_chunk(url, x0, x1, y0, y1, z0, z1, level=1, format="tiff"):
    """Read an arbitrary chunk of data

    :param url: Base URL of the precomputed data source
    :param x0: starting X coordinate, in the level's coordinate space
    :param x1: ending X coordinate (non-inclusive)
    :param y0: starting Y coordinate
    :param y1: ending Y cooridinate
    :param z0: starting Z coordinate
    :param z1: ending Z coordinate
    :param level: mipmap level
    :param format: the read format if it's a file URL. Defaults to tiff, but
    you can use "blockfs"
    :return: a Numpy array containing the data
    """
    is_file = urlparse(url).scheme.lower() == "file"
    info = get_info(url)
    scale = info.get_scale(level)
    result = np.zeros((z1 - z0, y1 - y0, x1 - x0), info.data_type)
    shape = np.array(scale.shape)
    offset = np.array(scale.offset)
    stride = np.array(scale.chunk_sizes)
    end = offset + shape

    x0d = _chunk_start(x0, offset[0], stride[0])
    x1d = _chunk_end(x1, offset[0], stride[0], end[0])
    y0d = _chunk_start(y0, offset[1], stride[1])
    y1d = _chunk_end(y1, offset[1], stride[1], end[1])
    z0d = _chunk_start(z0, offset[2], stride[2])
    z1d = _chunk_end(z1, offset[2], stride[2], end[2])
    for x0c, y0c, z0c in itertools.product(range(x0d, x1d, stride[0]),
                                           range(y0d, y1d, stride[1]),
                                           range(z0d, z1d, stride[2])):
        x1c = min(x1d, x0c + stride[0])
        y1c = min(y1d, y0c + stride[1])
        z1c = min(z1d, z0c + stride[2])
        chunk_url = url + "/" + scale.key + "/%d-%d_%d-%d_%d-%d" % (
            x0c, x1c, y0c, y1c, z0c, z1c)
        if is_file:
            if format == "tiff":
                chunk_url += ".tiff"
                with urlopen(chunk_url) as fd:
                    chunk = tifffile.imread(fd)
            elif format == "blockfs":
                from blockfs import Directory
                from .blockfs_stack import BlockfsStack
                directory_url = url + "/" + scale.key + "/" +\
                                BlockfsStack.DIRECTORY_FILENAME
                directory_parse = urlparse(directory_url)
                directory_path = os.path.join(directory_parse.netloc,
                                              unquote(directory_parse.path))
                directory = Directory.open(directory_path)
                chunk = directory.read_block(x0c, y0c, z0c)
            elif format == 'ngff':
                group = get_ngff_group_from_url(url)
                key = str(int(np.log2(level)))
                dataset = group[key]
                dataset.read_only = True
                chunk = dataset[0, 0, z0c:z1c, y0c:y1c, x0c:x1c]
            elif format == 'zarr':
                zarr_url = url + "/" + scale.key
                zarr_parse = urlparse(zarr_url)
                zarr_path = os.path.join(zarr_parse.netloc,
                                         unquote(zarr_parse.path))
                storage = zarr.NestedDirectoryStore(zarr_path)
                dataset = zarr.Array(storage)
                chunk = dataset[z0c:z1c, y0c:y1c, x0c:x1c]
            else:
                raise NotImplementedError("Can't read %s yet" % format)
        else:
            response = urlopen(chunk_url)
            data = response.read()
            chunk = np.frombuffer(data, info.data_type).reshape(
                (z1c - z0c, y1c - y0c, x1c - x0c))
        if z0c < z0:
            chunk = chunk[z0 - z0c:]
            z0c = z0
        if z1c > z1:
            chunk = chunk[:z1 - z0c]
            z1c = z1
        if y0c < y0:
            chunk = chunk[:, y0 - y0c:]
            y0c = y0
        if y1c > y1:
            chunk = chunk[:, :y1 - y0c]
            y1c = y1
        if x0c < x0:
            chunk = chunk[:, :, x0 - x0c:]
            x0c = x0
        if x1c > x1:
            chunk = chunk[:, :, :x1 - x0c]
            x1c = x1
        result[z0c - z0:z0c - z0 + chunk.shape[0],
               y0c - y0:y0c - y0 + chunk.shape[1],
               x0c - x0:x0c - x0 + chunk.shape[2]] = chunk
    return result
コード例 #22
0
def serve_precomputed(environ, start_response, config_file):
    config = get_config(config_file)
    path_info = environ["PATH_INFO"]
    if path_info == "/":
        return serve_directory(start_response, config_file)
    for source in config:
        if path_info[1:].startswith(source["name"] + "/"):
            try:
                filename = path_info[2 + len(source["name"]):]
                dest = os.path.join(source["directory"])
                if filename == "mesh/info":
                    return file_not_found(filename, start_response)
                elif filename == "info":
                    logging.info("Serving %s" % source["name"])
                    destpath = os.path.join(dest, "info")
                    if not os.path.exists(destpath):
                        return file_not_found(destpath, start_response)
                    with open(destpath, "rb") as fd:
                        data = fd.read()
                    start_response("200 OK",
                                   [("Content-type", "application/json"),
                                    ("Content-Length", str(len(data))),
                                    ('Access-Control-Allow-Origin', '*')])
                    return [data]
                elif source["format"] == "tiff":
                    import tifffile
                    path = os.path.join(dest, filename + ".tiff")
                    if not os.path.exists(path):
                        return file_not_found(path, start_response)
                    img = tifffile.imread(path)
                    data = img.tostring("C")
                    start_response(
                        "200 OK",
                        [("Content-type", "application/octet-stream"),
                         ("Content-Length", str(len(data))),
                         ('Access-Control-Allow-Origin', '*')])
                    return [data]
                elif source["format"] == "zarr":
                    import zarr
                    filename, x0, x1, y0, y1, z0, z1 = \
                        parse_filename(dest, filename)
                    if not os.path.exists(filename):
                        return file_not_found(filename, start_response)
                    store = zarr.NestedDirectoryStore(filename)
                    z_arr = zarr.open(store, mode='r')
                    chunk = z_arr[z0:z1, y0:y1, x0:x1]
                    data = chunk.tostring("C")
                    start_response(
                        "200 OK",
                        [("Content-type", "application/octet-stream"),
                         ("Content-Length", str(len(data))),
                         ('Access-Control-Allow-Origin', '*')])
                    return [data]
                elif source["format"] == "blockfs":
                    from blockfs import Directory
                    filename, x0, x1, y0, y1, z0, z1 = \
                        parse_filename(dest, filename)
                    filename = os.path.join(filename, "precomputed.blockfs")
                    directory = Directory.open(filename)
                    chunk = directory.read_block(x0, y0, z0)
                    data = chunk.tostring("C")
                    start_response(
                        "200 OK",
                        [("Content-type", "application/octet-stream"),
                         ("Content-Length", str(len(data))),
                         ('Access-Control-Allow-Origin', '*')])
                    return [data]
                elif source["format"] == "ngff":
                    import zarr
                    filename, x0, x1, y0, y1, z0, z1 = \
                        parse_filename(dest, filename)
                    root, level = os.path.split(filename)
                    lx, ly, lz = [int(_) for _ in level.split("_")]
                    llevel = int(np.round(np.log2(lx), 0))
                    store = zarr.NestedDirectoryStore(root)
                    group = zarr.group(store)
                    a = group[llevel]
                    _, _, zs, ys, xs = a.chunks
                    z1 = min(a.shape[2], z0 + zs)
                    y1 = min(a.shape[3], y0 + ys)
                    x1 = min(a.shape[4], x0 + xs)
                    chunk = a[0, 0, z0:z1, y0:y1, x0:x1]
                    data = chunk.tostring("C")
                    start_response(
                        "200 OK",
                        [("Content-type", "application/octet-stream"),
                         ("Content-Length", str(len(data))),
                         ('Access-Control-Allow-Origin', '*')])
                    return [data]
            except ParseFilenameError:
                return file_not_found(path_info, start_response)
    else:
        return file_not_found(path_info, start_response)
コード例 #23
0
def test_connectedcomponents(setup_connectedcomponents_hdf5_zarr,
                             disable_auto_retry):
    template_dir, _config, input_vol = setup_connectedcomponents_hdf5_zarr

    execution_dir, workflow = launch_flow(template_dir, 1)
    final_config = workflow.config

    output_path = final_config["output"]["zarr"]["path"]
    dset_name = final_config["output"]["zarr"]["dataset"]

    store = zarr.NestedDirectoryStore(output_path)
    f = zarr.open(store=store, mode='r')
    output_vol = f[dset_name][:]
    assert output_vol.shape == input_vol.shape

    final_labels = pd.unique(output_vol.reshape(-1))

    # Never change label 0
    assert 0 in final_labels
    assert ((input_vol == 0) == (output_vol == 0)).all()

    # Single-component objects
    assert 2 in final_labels
    assert 4 in final_labels

    assert ((input_vol == 2) == (output_vol == 2)).all()
    assert ((input_vol == 4) == (output_vol == 4)).all()

    # Split objects
    assert 1 not in final_labels
    assert 3 not in final_labels

    for corner in map(np.array, ndrange((0, 0, 0), (1, 8, 8), (1, 4, 4))):
        box = (corner, corner + (1, 4, 4))
        input_block = extract_subvol(input_vol, box)
        output_block = extract_subvol(output_vol, box)

        for orig_label in [1, 3]:
            if orig_label in input_block:
                positions = (input_block == orig_label)

                assert (input_block[positions] != output_block[positions]).all(), \
                    f"original label {orig_label} was not split!"

                assert (output_block[positions] > input_vol.max()).all(), \
                    f"original label {orig_label} was not split!"

                # This block-based assertion is not generally true for all possible input,
                # but our test data blocks are set up so that this is a valid check.
                # (No block happens to contain more than one final CC that came from the same original label.)
                assert (output_block[positions] == output_block[positions][0]).all(), \
                    f"original label {orig_label} ended up over-segmentated"

    # Check CSV output
    df = pd.read_csv(f'{execution_dir}/relabeled-objects.csv')

    assert len(df.query('orig_label == 0')) == 0
    assert len(df.query('orig_label == 1')) == 3
    assert len(df.query('orig_label == 2')) == 0
    assert len(df.query('orig_label == 3')) == 2
    assert len(df.query('orig_label == 4')) == 0

    assert not df['final_label'].duplicated().any()
    assert (df['final_label'] > input_vol.max()).all()
コード例 #24
0
from pathlib import Path
import numpy as np
import zarr
from numcodecs import Zlib, GZip, BZ2

# Nested directory store
nested_test_path = Path.home() / 'tmp' / 'zarr-test-nested.zarr'
group_path = 'test/data'

nested_store = zarr.NestedDirectoryStore(str(nested_test_path))
nested_root = zarr.group(store=nested_store, overwrite=True)
nested_group = nested_root.create_group(group_path)

array_3x2_c = np.arange(0, 3 * 2).reshape(2, 3)

nested_group.array(name='3x2_c_|u1',
                   dtype='|u1',
                   data=array_3x2_c,
                   chunks=(2, 3),
                   overwrite=True)
コード例 #25
0
ファイル: largememory.py プロジェクト: godslayer201/stingray
def _saveFITSZarr(f_name, dir_name, chunks):
    """
    Read a FITS file and save it for further processing.

    Parameters
    ----------
    f_name: string
        The name of file with which object was saved

    dir_name: string
        The name of the top level directory where the file is to be stored

    chunks: int
        The number of elements per chunk
    """

    compressor = Blosc(cname="lz4", clevel=1, shuffle=-1)

    store = zarr.NestedDirectoryStore(dir_name)
    fits_data_group = zarr.group(store=store, overwrite=True)
    main_data_group = fits_data_group.create_group("main_data", overwrite=True)
    meta_data_group = fits_data_group.create_group("meta_data", overwrite=True)

    with fits.open(f_name, memmap=True) as fits_data:
        for HDUList in fits_data:
            if HDUList.name == "EVENTS":
                times = HDUList.data["TIME"]
                chunks = times.size if times.size < chunks else chunks

                main_data_group.create_dataset(
                    name="times",
                    data=times,
                    compressor=compressor,
                    overwrite=True,
                    chunks=(chunks, ),
                )

                for col in ["PI", "PHA"]:
                    if col in HDUList.data.columns.names:
                        main_data_group.create_dataset(
                            name=f"{col.lower()}_channel",
                            data=HDUList.data[col],
                            compressor=compressor,
                            overwrite=True,
                            chunks=(chunks, ),
                        )

                meta_data_group.create_dataset(
                    name="tstart",
                    data=HDUList.header["TSTART"],
                    compressor=compressor,
                    overwrite=True,
                )

                meta_data_group.create_dataset(
                    name="tstop",
                    data=HDUList.header["TSTOP"],
                    compressor=compressor,
                    overwrite=True,
                )

                meta_data_group.create_dataset(
                    name="mjdref",
                    data=high_precision_keyword_read(HDUList.header, "MJDREF"),
                    compressor=compressor,
                    overwrite=True,
                )

            elif HDUList.name == "GTI":
                # TODO: Needs to be generalized
                start, stop = HDUList.data["START"], HDUList.data["STOP"]
                gti = np.array(list(zip(start, stop)))
                main_data_group.create_dataset(
                    name="gti",
                    data=gti.flatten(),
                    compressor=compressor,
                    overwrite=True,
                )
コード例 #26
0
    def write_level_n(self, level, silent=False):
        src_resolution = self.resolution(level - 1)
        dest_resolution = self.resolution(level)

        src = os.path.join(
            self.dest,
            "%d_%d_%d" % (src_resolution, src_resolution, src_resolution))
        dest = os.path.join(
            self.dest,
            "%d_%d_%d" % (dest_resolution, dest_resolution, dest_resolution))

        src_store = zarr.NestedDirectoryStore(src)
        src_zarr = zarr.open(src_store, mode='r')

        dest_store = zarr.NestedDirectoryStore(dest)
        dest_zarr = zarr.open(dest_store,
                              mode='w',
                              chunks=(64, 64, 64),
                              dtype=self.dtype,
                              shape=(self.z1(level)[-1], self.y1(level)[-1],
                                     self.x1(level)[-1]),
                              compression=self.compressor)

        z0s = self.z0(level - 1)  # source block coordinates
        z1s = self.z1(level - 1)
        y0s = self.y0(level - 1)
        y1s = self.y1(level - 1)
        x0s = self.x0(level - 1)
        x1s = self.x1(level - 1)
        z0d = self.z0(level)  # dest block coordinates
        z1d = self.z1(level)
        y0d = self.y0(level)
        y1d = self.y1(level)
        x0d = self.x0(level)
        x1d = self.x1(level)

        for xidx, yidx, zidx in tqdm.tqdm(
                list(
                    itertools.product(range(self.n_x(level)),
                                      range(self.n_y(level)),
                                      range(self.n_z(level)))),
                disable=silent
        ):  # looping over destination block indicies (fewer blocks than source)
            block = np.zeros((z1d[zidx] - z0d[zidx], y1d[yidx] - y0d[yidx],
                              x1d[xidx] - x0d[xidx]), np.uint64)
            hits = np.zeros((z1d[zidx] - z0d[zidx], y1d[yidx] - y0d[yidx],
                             x1d[xidx] - x0d[xidx]), np.uint64)
            for xsi1, ysi1, zsi1 in itertools.product(
                (0, 1), (0, 1),
                (0, 1)):  # looping over source blocks for this destination
                xsi = xsi1 + xidx * 2
                if xsi == self.n_x(
                        level - 1
                ):  # Check for any source blocks that are out-of-bounds
                    continue
                ysi = ysi1 + yidx * 2
                if ysi == self.n_y(level - 1):
                    continue
                zsi = zsi1 + zidx * 2
                if zsi == self.n_z(level - 1):
                    continue

                src_block = src_zarr[z0s[zsi]:z1s[zsi], y0s[ysi]:y1s[ysi],
                                     x0s[xsi]:x1s[xsi]]

                for offx, offy, offz in \
                        itertools.product((0, 1), (0, 1), (0,1)):
                    dsblock = src_block[offz::2, offy::2, offx::2]
                    block[zsi1*32:zsi1*32 + dsblock.shape[0],
                          ysi1*32:ysi1*32 + dsblock.shape[1],
                          xsi1*32:xsi1*32 + dsblock.shape[2]] += \
                        dsblock.astype(block.dtype)  # 32 is half-block size of source
                    hits[zsi1 * 32:zsi1 * 32 + dsblock.shape[0],
                         ysi1 * 32:ysi1 * 32 + dsblock.shape[1],
                         xsi1 * 32:xsi1 * 32 + dsblock.shape[2]] += 1
            block[hits > 0] = block[hits > 0] // hits[hits > 0]

            dest_zarr[z0d[zidx]:z1d[zidx], y0d[yidx]:y1d[yidx],
                      x0d[xidx]:x1d[xidx]] = block
コード例 #27
0
ファイル: largememory.py プロジェクト: godslayer201/stingray
def _saveChunkLC(lc, dir_name, chunks):
    """
    Save Lightcurve in chunks on disk.

    Parameters
    ----------
    lc: :class:`stingray.Lightcurve` object
        Lightcurve to be saved

    dir_name: string
        Top Level diretory name where Lightcurve is to be saved

    chunks: int
        The number of elements per chunk
    """
    # Creating a Nested Store and multiple groups for temporary saving
    store = zarr.NestedDirectoryStore(dir_name)
    lc_data_group = zarr.group(store=store, overwrite=True)
    main_data_group = lc_data_group.create_group("main_data", overwrite=True)
    meta_data_group = lc_data_group.create_group("meta_data", overwrite=True)

    compressor = Blosc(cname="lz4", clevel=1, shuffle=-1)  # Optimal

    main_data_group.create_dataset(
        name="times",
        data=lc.time,
        compressor=compressor,
        overwrite=True,
        chunks=(chunks, ),
    )

    main_data_group.create_dataset(
        name="counts",
        data=lc.counts,
        compressor=compressor,
        overwrite=True,
        chunks=(chunks, ),
    )

    if lc._counts_err is not None:
        main_data_group.create_dataset(
            name="count_err",
            data=lc.counts_err,
            compressor=compressor,
            overwrite=True,
            chunks=(chunks, ),
        )

    main_data_group.create_dataset(name="gti",
                                   data=lc.gti.flatten(),
                                   overwrite=True)

    meta_data_group.create_dataset(name="dt",
                                   data=lc.dt,
                                   compressor=compressor,
                                   overwrite=True)

    meta_data_group.create_dataset(
        name="err_dist",
        data=lc.err_dist,
        compressor=compressor,
        overwrite=True,
    )

    meta_data_group.create_dataset(name="mjdref",
                                   data=lc.mjdref,
                                   compressor=compressor,
                                   overwrite=True)