def main(cfg):
    dat = HubmapDataset(cfg["data_dir"], cfg["out_dir"])
    store = zarr.DirectoryStore(dat.path.out / "zarr" / cfg["version"] /
                                f"db.zarr")
    database = zarr.group(store=store, overwrite=False)

    for id_, _ in tqdm(list(dat.get_inf("train").iterrows())):

        database.create_group(id_)
        img = dat.get_img(id_)
        msk = dat.get_msk(id_, "target")

        shape = dat.get_shape(id_)
        rescale = A.Resize(height=int(shape[0] / cfg["scale"]),
                           width=int(shape[1] / cfg["scale"]),
                           p=1.0)

        transformed = rescale(image=img, mask=msk)
        img, msk = transformed["image"], transformed["mask"]
        del transformed
        gc.collect()

        database[id_]["img"] = zarr.array(img,
                                          chunks=(cfg["chunk_size"],
                                                  cfg["chunk_size"], 3))
        database[id_]["target"] = zarr.array(msk,
                                             chunks=(cfg["chunk_size"],
                                                     cfg["chunk_size"]))
        del img, msk
        gc.collect()
Beispiel #2
0
def convert_to_zarr(df, store_type, chunks):
    """Anything is possible with ZARR"""

    path = _get_temp_path(".zarr")
    adj_chunks = (min(df.shape[0], chunks[0]), min(df.shape[1], chunks[1]))
    store = getattr(zarr, store_type)(path)
    zarr.array(df.as_matrix(), store=store, chunks=adj_chunks, dtype='f4')

    return path
Beispiel #3
0
def classo_to_dir(problem):
    result = CLASSOProblemDirectoryFormat()
    
    # only do it for PATH for now
    solPATH_classo, solPATH_file = problem.solution.PATH , result.PATH
    if type(solPATH_classo) != str : 
        if type(solPATH_classo.SIGMAS)==str : 
            data = zarr.array(np.array([solPATH_classo.LAMBDAS,solPATH_classo.BETAS]) )
        else :                                
            data = zarr.array(np.array([solPATH_classo.LAMBDAS,solPATH_classo.BETAS,solPATH_classo.SIGMAS]) ) 
        
        with solPATH_file.open() as fh : 
            data.write(fh,format=SolutionFormat) # is it how it is supposed to be done ??
  
    return result
def save_results(conn, image, data, dataset, path):
    filename, file_extension = os.path.splitext(image.getName())
    # Save the probabilities file as an image
    print("Saving Probabilities as zarr file attached to the original Image")
    name = filename + "_Probabilities_zarr.zip"
    desc = "ilastik probabilities from Image:%s" % image.getId()
    # Re-organise array from tzyxc to zctyx order expected by OMERO
    # data = data.swapaxes(0, 1).swapaxes(3, 4).swapaxes(2, 3).swapaxes(1, 2)
    namespace = "ilastik.zarr.demo"
    fp = os.path.join(path, name)
    with zarr.ZipStore(fp, mode='w') as store:
        zarr.array(data, store=store, dtype='int16',
                   compressor=zarr.Blosc(cname='zstd'))
    ann = conn.createFileAnnfromLocalFile(fp, mimetype="application/zip",
                                          ns=namespace, desc=desc)
    image.linkAnnotation(ann)
Beispiel #5
0
    def read_box(
        self,
        bBox: BBox,
        outer_points: Union[bool, int] = False,
        aszarr: bool = False,
    ) -> Union[np.ndarray, zarr.Array]:
        """Reads a boxed sections of the geotiff to a zarr/numpy array

        Args:
            bBox (BBox): A bounding box
            outer_points (Union[bool, int]): Takes an int (n) that gets extra n layers of points/pixels that directly surround the bBox. Defaults to False.
            safe (bool): If True, returns a zarr array. If False, forces a returns as a numpy array by putting the data into memory.  Defaults to False.



        Returns:
            np.ndarray: zarr array of the geotiff file
        """
        ((x_min, y_min), (x_max,
                          y_max)) = self.get_int_box(bBox,
                                                     outer_points=outer_points)
        tiff_array = self.read()
        boxed_array = tiff_array[y_min:y_max, x_min:x_max]
        if aszarr:
            return zarr.array(boxed_array)
        return np.array(boxed_array)
Beispiel #6
0
    def array(self, data, expectedlen=None, **kwargs):

        # setup
        data = _util.ensure_array_like(data)
        kwargs = self._set_defaults(kwargs)

        # determine chunks
        kwargs.setdefault('chunks', default_chunks(data, expectedlen))

        # determine object codec
        if data.dtype == object:
            # peek at first value
            peek = data[0]
            if isinstance(peek, bytes):
                object_codec = numcodecs.VLenBytes()
            elif isinstance(peek, str):
                object_codec = numcodecs.VLenUTF8()
            else:
                object_codec = numcodecs.MsgPack()
            kwargs.setdefault('object_codec', object_codec)

        # create
        z = zarr.array(data, **kwargs)

        return z
Beispiel #7
0
    def get_doc_cluster_matrix(self,
                               l: int = 0,
                               normalize: bool = False) -> da.array:
        if "cluster" not in self.data[l]:
            path = common.PROJDIR / "hSBM" / self.name / "model.pkl"
            if path.is_file():
                model = dill.load(path.open("rb"))
            else:
                raise RuntimeError("File not found")

            def get_clusters(model: "sbmtm", l: int = 0) -> da.array:
                # rewrite from _sbmtm to use dask
                D = model.get_D()

                g = model.g
                state = model.state
                state_l = state.project_level(l).copy(overlap=True)
                state_l_edges = state_l.get_edge_blocks()  # labeled half-edges

                # count labeled half-edges, group-memberships
                B = state_l.get_B()

                id_d = np.zeros(g.edge_index_range, dtype=np.dtype(int))
                id_b = np.zeros(g.edge_index_range, dtype=np.dtype(int))
                weig = np.zeros(g.edge_index_range, dtype=np.dtype(int))

                for i, e in enumerate(g.edges()):
                    id_b[i], _ = state_l_edges[e]
                    id_d[i] = int(e.source())
                    weig[i] = g.ep["count"][e]

                n_db = sparse.COO(
                    [id_d, id_b], weig, shape=(D, B), fill_value=0
                )  # number of half-edges incident on document-node d and labeled as cluster

                del weig
                del id_b
                del id_d

                #####
                ind_d = np.where(np.sum(n_db, axis=0) > 0)[0]
                n_db = n_db[:, ind_d]
                del ind_d

                # Mixture of clusters into documetns P(d | c)
                p_td_d = n_db / np.sum(n_db, axis=0).todense()[np.newaxis, :]

                return da.array(p_td_d).map_blocks(lambda b: b.todense(),
                                                   dtype=np.dtype(float))

            self.data[l]["cluster"] = Raw.from_dask_array(
                self.path / f"clusters{l}.zarr.zip", get_clusters(model, l))
            self.save()

        doc_cluster = self.data[l]["cluster"].get()
        if normalize:
            return zarr.array(doc_cluster /
                              (doc_cluster[:].sum(axis=1))[:, np.newaxis])
        return doc_cluster
Beispiel #8
0
    def _write_existing_rootgroup(self, xarr: xr.Dataset, data_loc_copy: Union[list, np.ndarray], var_name: str, dims_of_arrays: dict,
                                  chunksize: tuple, timlength: int, timaxis: int, startingshp: tuple):
        """
        A slightly different operation than _write_new_dataset_rootgroup.  To write to an existing rootgroup array,
        we use the data_loc as an index and create a new zarr array from the xarray Dataarray.  The data_loc is only
        used if the var is a time based array.

        Parameters
        ----------
        xarr
            data to write to zarr
        data_loc_copy
            either [start time index, end time index] for xarr, ex: [0,1000] if xarr time dimension is 1000 long,
            or np.array([4,5,6,7,1,2...]) for when data might not be continuous and we need to use a boolean mask
        var_name
            variable name
        dims_of_arrays
            where keys are array names and values list of dims/shape.  Example: 'beampointingangle': [['time', 'sector', 'beam'], (5000, 3, 400)]
        chunksize
            chunk shape used to create the zarr array
        timlength
            Length of the time dimension for the input xarray Dataset
        timaxis
            index of the time dimension
        startingshp
            desired shape for the rootgroup array, might be modified later for total beams if necessary.  if finalsize
            is None (the case when this is not the first write in a set of distributed writes) this is still returned but not used.
        """

        # array to be written
        xarr_data = xarr[var_name].values
        if startingshp is not None:
            startingshp = self._write_adjust_max_beams(startingshp)
            self.rootgroup[var_name].resize(startingshp)

        if isinstance(data_loc_copy, list):  # [start index, end index]
            # the last write will often be less than the block size.  This is allowed in the zarr store, but we
            #    need to correct the index for it.
            if timlength != data_loc_copy[1] - data_loc_copy[0]:
                data_loc_copy[1] = data_loc_copy[0] + timlength

            # location for new data, assume constant chunksize (as we are doing this outside of this function)
            chunk_time_range = slice(data_loc_copy[0], data_loc_copy[1])
            # use the chunk_time_range for writes unless this variable is a non-time dim array (beam for example)
            chunk_idx = tuple(
                chunk_time_range if dims_of_arrays[var_name][1].index(i) == timaxis else slice(0, i) for i in
                dims_of_arrays[var_name][1])
            self.rootgroup[var_name][chunk_idx] = zarr.array(xarr_data, shape=dims_of_arrays[var_name][1], chunks=chunksize)
        else:  # np.array([4,5,6,1,2,3,8,9...]), indices of the new data, might not be sorted
            sorted_order = data_loc_copy.argsort()
            xarr_data = xarr_data[sorted_order]
            data_loc_copy = data_loc_copy[sorted_order]
            zarr_mask = np.zeros_like(self.rootgroup[var_name], dtype=bool)
            zarr_mask[data_loc_copy] = True
            # seems to require me to ravel first, examples only show setting with integer, not sure what is going on here
            self.rootgroup[var_name].set_mask_selection(zarr_mask, xarr_data.ravel())
Beispiel #9
0
 def get_doc_topic_matrix(
     self,
     skip_hash_check: bool = False,
     normalize: bool = False,
 ) -> zarr.array:
     doc_topic = self.data["doc_topic"].get()
     if normalize:
         return zarr.array(doc_topic /
                           (doc_topic[:].sum(axis=1))[:, np.newaxis])
     return doc_topic
Beispiel #10
0
 def get_topic_word_matrix(
     self,
     skip_hash_check: bool = False,
     normalize: bool = False,
 ) -> zarr.array:
     topic_word = self.data["topic_word"].get()
     if normalize:
         return zarr.array(topic_word /
                           (topic_word[:].sum(axis=1))[:, np.newaxis])
     return topic_word
Beispiel #11
0
    def save(self, file_name = 'sim'):
        """ serialization of object and saving it to file"""

        root = zarr.open_group('state/' + file_name + '.zarr', mode = 'w')
        values = root.create_dataset('values', shape = (self.L_with_boundary, self.L_with_boundary), chunks = (10, 10), dtype = 'i4')
        values = zarr.array(self.values)
        #data_acquisition = root.create_dataset('data_acquisition', shape = (len(self.data_acquisition)), chunks = (1000), dtype = 'i4')
        #data_acquisition = zarr.array(self.data_acquisition)
        root.attrs['L'] = self.L
        root.attrs['save_every'] = self.save_every

        return root
    def array(self, data, expectedlen=None, **kwargs):

        # setup
        data = _util.ensure_array_like(data)
        kwargs = self._set_defaults(kwargs)

        # determine chunks
        kwargs.setdefault('chunks', default_chunks(data, expectedlen))

        # create
        z = zarr.array(data, **kwargs)

        return z
def test_numpy_writeable():
    # Create data
    original = np.random.rand(1024, 1024)
    mutable = zarr.array(original)

    # Initialize app
    route = create_zarr_route(mutable)
    app = Starlette(routes=[route])

    # Open remote array and compare
    remote_store = HTTPStore(TestClient(app))
    arr = zarr.open_array(remote_store)
    arr[:50, :50] = 2

    np.testing.assert_allclose(arr[:], mutable[:])
Beispiel #14
0
def test(directory_path):
    base = zarr.open(directory_path, mode='r+')
    downsize_dimensions = np.asarray(base.shape) / np.asarray(base.chunks)
    if (np.unique(downsize_dimensions).size != 1):
        print("not all dimensions reduce equally; should never happen?")
    downsize_factor = int(downsize_dimensions[0]**(1 / 2))

    downsize_factor -= 1  # already have first level of pyramid
    small_image = base[:]  #currently must fit into RAM, needs to scale
    levels = []

    while (downsize_factor >= 0):
        small_image = countless(small_image)
        newLevel = zarr.array(small_image, chunks=base.chunks)
        zarr.save(os.path.join(directory_path, str(downsize_factor)), newLevel)
        downsize_factor -= 1
Beispiel #15
0
    def __init__(self, store, data=None, store_type='sqlite', **kwarg):
        self._store_type = store_type
        store = store.replace('-', '/')

        if not (os.access(os.path.dirname(store), mode=os.R_OK)):
            raise ValueError('Library does not exists')
        if store_type == 'sqlite':
            self.store = zarr.SQLiteStore(store)
        else:
            self.store = zarr.DirectoryStore(store)
        self.group = zarr.open_group(store=self.store, **kwarg)
        if isinstance(data, np.ndarray) and data.dtype.kind == 'V':
            self.store.cursor.execute('BEGIN TRANSACTION')
            for col in data.dtype.names:
                self.group[col] = zarr.array(data[col])
            self.store.cursor.execute('COMMIT')
Beispiel #16
0
def spots_with_flow(config, spots):
    prediction = None
    if hasattr(config, 'tiff_input') and config.tiff_input is not None:
        img_input = np.array([skimage.io.imread(f) for f in config.tiff_input])
    elif config.zpath_input is not None:
        za_input = zarr.open(config.zpath_input, mode='a')
        za_flow = zarr.open(config.zpath_flow, mode='a')
        za_hash = zarr.open(config.zpath_flow_hashes, mode='a')

        # https://stackoverflow.com/questions/3431825/generating-an-md5-checksum-of-a-file#answer-3431838
        hash_md5 = hashlib.md5()
        with open(config.model_path, 'rb') as f:
            for chunk in iter(lambda: f.read(4096), b''):
                hash_md5.update(chunk)
        za_md5 = zarr.array(
            za_input[config.timepoint - 1:config.timepoint + 1]).digest('md5')
        hash_md5.update(za_md5)
        hash_md5.update(json.dumps(config.patch_size).encode('utf-8'))
        model_md5 = hash_md5.digest()
        if model_md5 == za_hash[config.timepoint - 1]:
            prediction = za_flow[config.timepoint - 1]
        else:
            img_input = np.array([
                normalize_zero_one(za_input[i].astype('float32'))
                for i in range(config.timepoint - 1, config.timepoint + 1)
            ])
    if prediction is None:
        try:
            prediction = _get_flow_prediction(img_input,
                                              config.timepoint,
                                              config.model_path,
                                              config.keep_axials,
                                              config.device,
                                              config.flow_norm_factor,
                                              config.patch_size)
        finally:
            torch.cuda.empty_cache()
        if config.output_prediction:
            za_flow[config.timepoint - 1] = prediction
            za_hash[config.timepoint - 1] = model_md5
        else:
            za_hash[config.timepoint - 1] = 0
    # Restore to voxel unit
    for d in range(prediction.shape[0]):
        prediction[d] *= config.flow_norm_factor[d]
    res_spots = _estimate_spots_with_flow(spots, prediction, config.scales)
    return res_spots
def test_numpy_read_only():
    # Create data
    original = np.random.rand(1024, 1024)
    z = zarr.array(original, read_only=True)

    # Initialize app
    route = create_zarr_route(z)
    app = Starlette(routes=[route])

    # Open remote array and compare
    remote_store = HTTPStore(TestClient(app))
    arr = zarr.open_array(remote_store)
    np.testing.assert_allclose(arr[:], original)

    # Make sure can't write
    with pytest.raises(ValueError):
        arr[:50, :50] = 10
def _test_pairwise_distance(metric):

    # Simulate some data, N.B., oriented such that we want to compute
    # distance between columns.
    data = np.random.randint(low=0, high=3, size=(100, 10), dtype=np.int8)

    # Compute expected result, using scipy as reference implementation.
    expect = spd.pdist(data.T, metric=metric)

    # Test numpy array.
    actual = pairwise_distance(data, metric=metric)
    assert isinstance(actual, np.ndarray)
    assert_allclose(expect, actual)
    assert actual.dtype.kind == "f"

    # Test cuda array.
    data_cuda = cuda.to_device(data)
    actual = pairwise_distance(data_cuda, metric=metric)
    assert isinstance(actual, type(data_cuda))
    assert_allclose(expect, actual.copy_to_host())
    assert actual.dtype.kind == "f"

    # Test dask array.
    data_dask = da.from_array(data, chunks=(10, 5))
    actual = pairwise_distance(data_dask, metric=metric)
    assert isinstance(actual, da.Array)
    ac = actual.compute(scheduler="single-threaded")
    assert_allclose(expect, ac)
    assert actual.dtype.kind == "f"

    # Test dask array with cuda.
    data_dask_cuda = data_dask.rechunk((10, -1)).map_blocks(cuda.to_device)
    actual = pairwise_distance(data_dask_cuda, metric=metric)
    assert isinstance(actual, da.Array)
    ac = actual.compute(scheduler="single-threaded")
    assert_allclose(expect, ac)
    assert actual.dtype.kind == "f"

    # Test zarr array.
    data_zarr = zarr.array(data, chunks=(10, 5))
    actual = pairwise_distance(data_zarr, metric=metric)
    assert isinstance(actual, da.Array)
    assert_allclose(expect, actual.compute())
    assert actual.dtype.kind == "f"
def test_count_alleles():
    gt = np.array([[[0, 0], [0, 1], [2, 2]], [[-1, 0], [1, -1], [-1, -1]]],
                  dtype=np.int8)
    expect = np.array([[3, 1, 2], [1, 1, 0]], dtype="i4")

    # Test numpy array.
    actual = genotypes_count_alleles(gt, max_allele=2)
    assert isinstance(actual, np.ndarray)
    assert_array_equal(expect, actual)

    # Test cuda array.
    gt_cuda = cuda.to_device(gt)
    actual = genotypes_count_alleles(gt_cuda, max_allele=2)
    assert isinstance(actual, type(gt_cuda))
    assert_array_equal(expect, actual.copy_to_host())

    # Test dask array.
    gt_dask = da.from_array(gt, chunks=(1, 2, -1))
    actual = genotypes_count_alleles(gt_dask, max_allele=2)
    assert isinstance(actual, da.Array)
    assert_array_equal(expect, actual.compute())

    # Test zarr array.
    gt_zarr = zarr.array(gt, chunks=(1, 2, None))
    actual = genotypes_count_alleles(gt_zarr, max_allele=2)
    assert isinstance(actual, da.Array)
    assert_array_equal(expect, actual.compute())

    # Test dask cuda array.
    gt_dask_cuda = gt_dask.map_blocks(cuda.to_device)
    actual = genotypes_count_alleles(gt_dask_cuda, max_allele=2)
    assert isinstance(actual, da.Array)
    assert_array_equal(expect, actual.compute(scheduler="single-threaded"))

    # Test exceptions.
    with pytest.raises(TypeError):
        # noinspection PyTypeChecker
        genotypes_count_alleles(gt, max_allele="foo")
    with pytest.raises(TypeError):
        # noinspection PyTypeChecker
        genotypes_count_alleles(gt, max_allele=[1])
    with pytest.raises(ValueError):
        genotypes_count_alleles(gt, max_allele=128)
Beispiel #20
0
def compress_layer(layer_array: np.array):
    """
    Returns compressed version of the original numpy array
    based on memory efficient compression.

    Parameters
    ----------
    layer_array: Numpy array
        Original array version of the layer data

    Returns
    -------
    layer_shape: tuple
        Tuple containing shape of the layer_array
    compressed_layer: np.array
        Sparse version (COO list) of the layer data, if is_sparse == True
        zarr array with zstd compression, if is_sparse == False
    is_sparse: bool
        True if Napari layer should be represented in COO list.
    """
    layer_shape = tuple(layer_array.shape)

    # USE COORD LIST FOR SPARSE LABELLING
    tmp_coo = sparse.COO(layer_array)
    # join coords and data in single array
    coo_array = np.append(tmp_coo.coords, np.array([tmp_coo.data]), axis=0)
    coo_array = coo_array.astype(np.uint16)

    # USE ZSTD COMPRESSION FOR PREDICTIONS, higher clevel takes more time.
    zarr_chunks = tuple([1 for i in range(len(layer_array.shape) - 2)] +
                        [1024, 1024])
    compressor = Blosc(cname='zstd', clevel=3, shuffle=Blosc.BITSHUFFLE)
    data = layer_array.astype(np.uint16)
    zarr_array = zarr.array(data, chunks=zarr_chunks, compressor=compressor)

    if zarr_array.nbytes_stored >= coo_array.nbytes:
        compressed_layer = coo_array
        is_sparse = True
    else:
        compressed_layer = zarr_array
        is_sparse = False
    return layer_shape, compressed_layer, is_sparse
Beispiel #21
0
    def _create_array(self, data, **kwargs):

        # determine chunks
        chunks = default_chunks(data)
        kwargs.setdefault('chunks', chunks)

        # create array
        if 'path' in kwargs:
            kwargs['mode'] = 'w'
            kwargs['shape'] = data.shape
            # ensure dtype is specified
            dtype = kwargs.get('dtype', None)
            if not dtype:
                kwargs['dtype'] = data.dtype
            z = zarr.open(**kwargs)
            z[:] = data
        else:
            z = zarr.array(data, **kwargs)

        return z
def test_to_haplotypes():

    gt = np.array([[[0, 0], [0, 1], [2, 2]], [[-1, 0], [1, -1], [-1, -1]]],
                  dtype=np.int8)
    expect = np.array([[0, 0, 0, 1, 2, 2], [-1, 0, 1, -1, -1, -1]],
                      dtype=np.int8)

    # Test numpy array.
    actual = genotypes_to_haplotypes(gt)
    assert isinstance(actual, np.ndarray)
    assert_array_equal(expect, actual)

    # Test numpy array, F order.
    actual = genotypes_to_haplotypes(np.asfortranarray(gt))
    assert isinstance(actual, np.ndarray)
    assert_array_equal(expect, actual)

    # Test dask array.
    gt_dask = da.from_array(gt, chunks=(1, 2, -1))
    actual = genotypes_to_haplotypes(gt_dask)
    assert isinstance(actual, da.Array)
    assert_array_equal(expect, actual.compute())

    # Test zarr array.
    gt_zarr = zarr.array(gt, chunks=(1, 2, 2))
    actual = genotypes_to_haplotypes(gt_zarr)
    assert isinstance(actual, da.Array)
    assert_array_equal(expect, actual.compute())

    # Test exceptions.
    with pytest.raises(TypeError):
        # Wrong type.
        genotypes_to_haplotypes("foo")
    with pytest.raises(TypeError):
        # Wrong dtype.
        genotypes_to_haplotypes(gt.astype("f4"))
    with pytest.raises(ValueError):
        # Wrong ndim.
        genotypes_to_haplotypes(gt[0])
Beispiel #23
0
def serve(source, *, name=None, allowed_origins=None, **kwargs):
    """Starts an HTTP server, serving a part of a zarr hierarchy or numpy array as zarr.

    Parameters
    ----------
    source : zarr.Array, zarr.Group, or np.ndarray
        Source data to serve over HTTP. The underlying store of a zarr.Array,
        or zarr.Group are used to forward requests. If a numpy array is provided,
        an in-memory zarry array is created, and the underlying store is wrapped.
    name : str
        Path prefix for underlying store keys (e.g. "data.zarr"). If provided, routes are 
        prefixed with name.
    allowed_origins : list of str, optional
        List of allowed origins (as strings). Use wildcard "*" to allow all.
    **kwargs : keyword arguments
        All extra keyword arguments are forwarded to uvicorn.run
    """

    if isinstance(source, np.ndarray):
        # Need to cast as zarr and create store for in memory numpy array
        source = zarr.array(source)

    if not isinstance(source, (zarr.Array, zarr.Group)):
        raise TypeError(
            "Source is not one of numpy.ndarray, zarr.Array, or zarr.Group.")

    route = create_zarr_route(source)
    routes = [route] if name is None else [Mount("/" + name, routes=[route])]
    server = Starlette(routes=routes)
    if allowed_origins:
        server.add_middleware(
            CORSMiddleware,
            allow_origins=allowed_origins,
            allow_credentials=True,
            allow_methods=["*"],
            allow_headers=["*"],
        )
    uvicorn.run(server, **kwargs)
Beispiel #24
0
def test_zarr(selenium):
    import numpy as np
    import zarr
    from numcodecs import Blosc

    # basic test
    z = zarr.zeros((1000, 1000), chunks=(100, 100), dtype="i4")
    assert z.shape == (1000, 1000)

    # test assignment
    z[0, :] = np.arange(1000)
    assert z[0, 1] == 1

    # test saving and loading
    a1 = np.arange(10)
    zarr.save("/tmp/example.zarr", a1)
    a2 = zarr.load("/tmp/example.zarr")
    np.testing.assert_equal(a1, a2)

    # test compressor
    compressor = Blosc(cname="zstd", clevel=3, shuffle=Blosc.BITSHUFFLE)
    data = np.arange(10000, dtype="i4").reshape(100, 100)
    z = zarr.array(data, chunks=(10, 10), compressor=compressor)
    assert z.compressor == compressor
Beispiel #25
0
def zarr_im_rgb_np():
    return zarr.array(
        np.random.randint(0, 255, (2048, 2048, 3), dtype=np.uint8))
Beispiel #26
0
def zarr_im_mch_np():
    return zarr.array(
        np.random.randint(0, 255, (3, 2048, 2048), dtype=np.uint16))
Beispiel #27
0
"""
generate test data for zarr-js
"""

import zarr
from numpy import arange
from numcodecs.zlib import Zlib

# 1d.contiguous.compressed.i2
store = zarr.DirectoryStore('data/1d.contiguous.compressed.i2.zarr')
z = zarr.array([1, 2, 3, 4],
               dtype='i2',
               store=store,
               chunks=(4, ),
               compressor=Zlib())

# 1d.contiguous.uncompressed.i2
store = zarr.DirectoryStore('data/1d.contiguous.uncompressed.i2.zarr')
z = zarr.array([1, 2, 3, 4],
               dtype='i2',
               store=store,
               chunks=(4, ),
               compressor=None)

# 1d.contiguous.compressed.i4
store = zarr.DirectoryStore('data/1d.contiguous.compressed.i4.zarr')
z = zarr.array([1, 2, 3, 4],
               dtype='i4',
               store=store,
               chunks=(4, ),
               compressor=Zlib())
def _test_gt_func(f, gt, expect, compare, **kwargs):

    # 3D tests.
    assert gt.ndim == 3

    # Test numpy array.
    actual = f(gt, **kwargs)
    assert isinstance(actual, np.ndarray)
    compare(expect, actual)

    # Test numpy array, Fortran order.
    actual = f(np.asfortranarray(gt), **kwargs)
    assert isinstance(actual, np.ndarray)
    compare(expect, actual)

    # Test dask array.
    gt_dask = da.from_array(gt, chunks=(1, 2, -1))
    actual = f(gt_dask, **kwargs)
    assert isinstance(actual, da.Array)
    assert_array_equal(expect, actual.compute())

    # Test zarr array.
    gt_zarr = zarr.array(data=gt, chunks=(1, 2, None))
    actual = f(gt_zarr, **kwargs)
    assert isinstance(actual, da.Array)
    assert_array_equal(expect, actual.compute())

    # Reshape to test as 2D.
    gt = gt.reshape((-1, gt.shape[2]))
    if expect.ndim == 3:
        expect = expect.reshape((gt.shape[0], -1))
    elif expect.ndim == 2:
        expect = expect.reshape(-1)

    # Test numpy array.
    actual = f(gt, **kwargs)
    assert isinstance(actual, np.ndarray)
    compare(expect, actual)

    # Test dask array.
    gt_dask = da.from_array(gt, chunks=(2, -1))
    actual = f(gt_dask, **kwargs)
    assert isinstance(actual, da.Array)
    assert_array_equal(expect, actual.compute())

    # Test zarr array.
    gt_zarr = zarr.array(data=gt)
    actual = f(gt_zarr, **kwargs)
    assert isinstance(actual, da.Array)
    assert_array_equal(expect, actual.compute())

    # Test exceptions.
    with pytest.raises(TypeError):
        # Wrong type.
        f("foo", **kwargs)
    with pytest.raises(TypeError):
        # Wrong dtype.
        f(gt.astype("f4"), **kwargs)
    with pytest.raises(ValueError):
        # Wrong ndim.
        f(gt[0], **kwargs)
Beispiel #29
0
from numcodecs import Blosc
compressor = Blosc(cname='snappy')


@contextmanager
def timeit(title=''):
    start = perf_counter()
    yield
    delta = perf_counter() - start
    print(title, delta)


def data(i):
    return ['abc' * 30, 'def' * 30, 'ijk' * 30][i % 3]


N = 100_000
data = [data(i) for i in range(N)]
gr = zarr.group()
arr = zarr.array(data, dtype=str)
# gr['names'] = arr

st = zarr.open('text_store.zarr')
with timeit('store'):
    zarr.copy(arr, st, 'names')

with open('text_file.txt', 'w') as fh:
    fh.write('\n'.join(data))

print(st['names'][101] == data[101])
Beispiel #30
0
 def setup_instance(self, data):
     z = zarr.array(data, chunks=(2, None))
     return AlleleCountsDaskArray(z)
Beispiel #31
0
def test_array():
    a = np.arange(100)
    z = array(a, chunks=10)
    eq(a.shape, z.shape)
    eq(a.dtype, z.dtype)
    assert_array_equal(a, z[:])
Beispiel #32
0
def _test_ac_func(f, ac, expect, compare):

    # 3D tests.
    assert ac.ndim == 3

    # Test numpy array.
    actual = f(ac)
    assert isinstance(actual, np.ndarray)
    compare(expect, actual)
    assert expect.dtype == actual.dtype

    # Test numpy array, Fortran order.
    actual = f(np.asfortranarray(ac))
    assert isinstance(actual, np.ndarray)
    compare(expect, actual)
    assert expect.dtype == actual.dtype

    # Test dask array.
    ac_dask = da.from_array(ac, chunks=(1, 2, -1))
    actual = f(ac_dask)
    assert isinstance(actual, da.Array)
    compare(expect, actual.compute())
    assert expect.dtype == actual.dtype

    # Test zarr array.
    ac_zarr = zarr.array(data=ac)
    actual = f(ac_zarr)
    assert isinstance(actual, da.Array)
    compare(expect, actual.compute())
    assert expect.dtype == actual.dtype

    # Reshape to test as 2D.
    ac = ac.reshape((-1, ac.shape[2]))
    if expect.ndim == 3:
        expect = expect.reshape(ac.shape)
    elif expect.ndim == 2:
        expect = expect.reshape(-1)

    # Test numpy array.
    actual = f(ac)
    assert isinstance(actual, np.ndarray)
    compare(expect, actual)
    assert expect.dtype == actual.dtype

    # Test dask array.
    ac_dask = da.from_array(ac, chunks=(2, -1))
    actual = f(ac_dask)
    assert isinstance(actual, da.Array)
    compare(expect, actual.compute())
    assert expect.dtype == actual.dtype

    # Test zarr array.
    ac_zarr = zarr.array(data=ac)
    actual = f(ac_zarr)
    assert isinstance(actual, da.Array)
    compare(expect, actual.compute())
    assert expect.dtype == actual.dtype

    # Test errors.
    with pytest.raises(TypeError):
        # Wrong type.
        f("foo")
    with pytest.raises(TypeError):
        # Wrong dtype.
        f(ac.astype("f4"))
    with pytest.raises(ValueError):
        # Wrong ndim.
        f(ac[0])