Ejemplo n.º 1
0
def write_multiscale(pyramid: List, group: zarr.Group) -> None:
    """Write a pyramid with multiscale metadata to disk."""
    paths = []
    for path, dataset in enumerate(pyramid):
        group.create_dataset(str(path), data=pyramid[path])
        paths.append({"path": str(path)})

    multiscales = [{"version": "0.1", "datasets": paths}]
    group.attrs["multiscales"] = multiscales
Ejemplo n.º 2
0
 def write_array(self, group: zarr.Group, name: str,
                 array: np.ndarray) -> None:
     if array.dtype.kind == 'V':
         self.write_dataframe(group, name, array)
     else:
         dtype = str if array.dtype.kind == 'O' else array.dtype
         group.create_dataset(name,
                              data=array,
                              shape=array.shape,
                              chunks=calc_chunk(array.shape),
                              dtype=dtype,
                              compressor=COMPRESSOR,
                              overwrite=True)
Ejemplo n.º 3
0
def write_multiscale(
    pyramid: List,
    group: zarr.Group,
    chunks: Union[Tuple[Any, ...], int] = None,
) -> None:
    """Write a pyramid with multiscale metadata to disk."""
    paths = []
    for path, dataset in enumerate(pyramid):
        # TODO: chunks here could be different per layer
        group.create_dataset(str(path), data=dataset, chunks=chunks)
        paths.append({"path": str(path)})

    multiscales = [{"version": "0.2", "datasets": paths}]
    group.attrs["multiscales"] = multiscales
Ejemplo n.º 4
0
def create_zarr_obj_array(g: zarr.Group,
                          name: str,
                          data,
                          dtype: Union[str, Any] = None,
                          overwrite: bool = True) -> zarr.hierarchy:
    """
    Creates and returns a Zarr object array.

    A Zarr object array can contain any type of object.
    https://zarr.readthedocs.io/en/stable/tutorial.html#object-arrays

    Args:
        g (zarr.hierarchy):
        name (str):
        data ():
        dtype (Union[str, Any]):
        overwrite (bool):

    Returns:
        A Zarr object Array.
    """
    data = np.array(data)
    if dtype is None or dtype == object:
        dtype = 'U' + str(max([len(str(x)) for x in data]))
    if np.issubdtype(data.dtype, np.dtype('S')):
        data = data.astype('U')
        dtype = data.dtype
    return g.create_dataset(name,
                            data=data,
                            chunks=(100000, ),
                            shape=len(data),
                            dtype=dtype,
                            overwrite=overwrite)
Ejemplo n.º 5
0
def create_zarr_dataset(g: zarr.Group,
                        name: str,
                        chunks: tuple,
                        dtype: Any,
                        shape: Tuple,
                        overwrite: bool = True) -> zarr.hierarchy:
    """
    Creates and returns a Zarr array.

    Args:
        g (zarr.hierarchy):
        name (str):
        chunks (tuple):
        dtype (Any):
        shape (Tuple):
        overwrite (bool):

    Returns:
        A Zarr Array.
    """
    from numcodecs import Blosc

    compressor = Blosc(cname='lz4', clevel=5, shuffle=Blosc.BITSHUFFLE)
    return g.create_dataset(name,
                            chunks=chunks,
                            dtype=dtype,
                            shape=shape,
                            compressor=compressor,
                            overwrite=overwrite)
Ejemplo n.º 6
0
    def write_series(self, group: zarr.Group, name: str, array: np.ndarray,
                     data_type: str) -> None:
        if data_type == 'data_frame':
            if not is_categorical_dtype(
                    array) and name != '_index' and is_string_dtype(array):
                keywords = set(array)
                if len(keywords
                       ) <= array.size / 10.0:  # at least 10x reduction
                    array = pd.Categorical(array,
                                           categories=natsorted(keywords))

            if is_categorical_dtype(array):
                # write category keys
                categories = group.require_group('_categories')
                values = array.categories.values
                if isinstance(values[0], bytes):
                    values = np.array([x.decode() for x in values],
                                      dtype=object)
                dtype = str if values.dtype.kind == 'O' else values.dtype
                categories.create_dataset(name,
                                          data=values,
                                          shape=values.shape,
                                          chunks=calc_chunk(values.shape),
                                          dtype=dtype,
                                          compressor=COMPRESSOR,
                                          overwrite=True)
                # write codes
                codes_arr = group.create_dataset(name,
                                                 data=array.codes,
                                                 shape=array.codes.shape,
                                                 chunks=calc_chunk(
                                                     array.codes.shape),
                                                 dtype=array.codes.dtype,
                                                 compressor=COMPRESSOR,
                                                 overwrite=True)
                codes_arr.attrs['ordered'] = array.ordered

                return None

        dtype = str if array.dtype.kind == 'O' else array.dtype
        group.create_dataset(name,
                             data=array,
                             shape=array.shape,
                             chunks=calc_chunk(array.shape),
                             dtype=dtype,
                             compressor=COMPRESSOR,
                             overwrite=True)
Ejemplo n.º 7
0
def write_multiscale(
    pyramid: List,
    group: zarr.Group,
    chunks: Union[Tuple[Any, ...], int] = None,
    fmt: Format = CurrentFormat(),
    axes: Union[str, List[str]] = None,
) -> None:
    """
    Write a pyramid with multiscale metadata to disk.

    Parameters
    ----------
    pyramid: List of np.ndarray
      the image data to save. Largest level first
      All image arrays MUST be up to 5-dimensional with dimensions
      ordered (t, c, z, y, x)
    group: zarr.Group
      the group within the zarr store to store the data in
    chunks: int or tuple of ints,
      size of the saved chunks to store the image
    fmt: Format
      The format of the ome_zarr data which should be used.
      Defaults to the most current.
    axes: str or list of str
      the names of the axes. e.g. "tczyx". Not needed for v0.1 or v0.2
      or for v0.3 if 2D or 5D. Otherwise this must be provided
    """

    dims = len(pyramid[0].shape)
    axes = _validate_axes_names(dims, axes, fmt)

    paths = []
    for path, dataset in enumerate(pyramid):
        # TODO: chunks here could be different per layer
        group.create_dataset(str(path), data=dataset, chunks=chunks)
        paths.append(str(path))
    write_multiscales_metadata(group, paths, fmt, axes)
Ejemplo n.º 8
0
def create_zarr_obj_array(
    g: zarr.Group,
    name: str,
    data,
    dtype: Union[str, Any] = None,
    overwrite: bool = True,
    chunk_size: int = 100000,
) -> zarr.hierarchy:
    """
    Creates and returns a Zarr object array.

    A Zarr object array can contain any type of object.
    https://zarr.readthedocs.io/en/stable/tutorial.html#object-arrays

    Args:
        g (zarr.hierarchy):
        name (str):
        data ():
        dtype (Union[str, Any]):
        overwrite (bool):
        chunk_size (int):

    Returns:
        A Zarr object Array.
    """

    from numcodecs import Blosc

    compressor = Blosc(cname="lz4", clevel=5, shuffle=Blosc.BITSHUFFLE)

    data = np.array(data)
    if dtype is None or dtype == object:
        dtype = "U" + str(max([len(str(x)) for x in data]))
    if np.issubdtype(data.dtype, np.dtype("S")):
        data = data.astype("U")
        dtype = data.dtype
    if chunk_size is None or chunk_size is False:
        chunks = False
    else:
        chunks = (chunk_size, )
    return g.create_dataset(
        name,
        data=data,
        chunks=chunks,
        shape=len(data),
        dtype=dtype,
        overwrite=overwrite,
        compressor=compressor,
    )