Beispiel #1
0
def load_zarr_archive(zarr_path, result_shape, result_chunks, overwrite=False):
    if overwrite == True:
        if zarr_path.exists():
            shutil.rmtree(zarr_path)
            print("zarr archive removed")

        z = zarr.create(
            result_shape,
            chunks=result_chunks,
            dtype=np.float64,
            fill_value=-np.inf, # -np.inf indicates incomplete computation
            store=str(zarr_path)
        )
        print("zarr_archive created")
    else:
        if zarr_path.exists():
            z = zarr.open(str(zarr_path))
            print("zarr archive loaded")
        else:
            z = zarr.create(
                result_shape,
                chunks=result_chunks,
                dtype=np.float64,
                fill_value=-np.inf, # -np.inf indicates incomplete computation
                store=str(zarr_path)
            )
            print("zarr_archive created")
    
    return z
Beispiel #2
0
def get_auto_chunks(shape, dtype):
    # A hack to get chunks guessed by zarr
    if dtype == object:
        arr = zarr.create(shape,
                          dtype=dtype,
                          object_codec=zarr.codecs.Pickle())
    else:
        arr = zarr.create(shape, dtype=dtype)
    return arr.chunks
Beispiel #3
0
    def get_temp_filepath(self):
        if self.backend == 'POSIX':
            self.temp_dir    = tempfile.mkdtemp()
            self.dir_store   = os.path.join(self.temp_dir,
                                            'temp-%s%s' % (next(_counter),
                                            self.suffix))
            # Saving dask objects as Zarr requires more than just a filehandle
            if not self.dask:
                self.storage_obj = self.dir_store
            else:
                self.storage_obj = zarr.create(shape=self.shape, chunks=self.chunksize,
                                               store=self.dir_store, dtype=self.dtype, 
                                               overwrite=True)
        elif self.backend == 'GCS':
            if not self.gcs_zarr:
                    raise NotImplementedError("Missing config for GCP test")
            
            # HACK in order to give worker pods read/write to storage
            fs = gcsfs.GCSFileSystem(project=self.gcp_project_name, token='cache')
            token = fs.session.credentials
            self.gcp_project = gcsfs.GCSFileSystem(project=self.gcp_project_name, 
                                                   token=token)
            self.gcsfsmap    = gcsfs.mapping.GCSMap(self.gcs_zarr, 
                                                    gcs=self.gcp_project,
                                                    check=True, create=False)
            if not self.dask:
                gsutil_arg = "gs://%s" % self.gcs_zarr
                call(["gsutil", "-q", "-m", "rm", "-r", gsutil_arg])
                self.storage_obj = self.gcsfsmap
            else: 
                self.storage_obj = zarr.create(shape=self.shape, chunks=self.chunksize,
                                               store=self.gcsfsmap, dtype=self.dtype, 
                                               overwrite=True)
            
        elif self.backend == 'FUSE':
            if not self.gcs_zarr_fuse:
                raise NotImplementedError("Missing config for FUSE test")

            self.temp_dir    = tempfile.mkdtemp()
            self.dir_store = self.temp_dir + self.gcs_zarr_fuse
            call([GCSFUSE, self.gcs_bucket, self.temp_dir])

            # Remove previous test runs
            if os.path.exists(self.dir_store):
                shutil.rmtree(self.dir_store)
            os.makedirs(self.dir_store)

            # Return the path if this isn't Dask
            # TODO: This should be a function
            if not self.dask:
                self.storage_obj = self.dir_store
            else:
                self.storage_obj = zarr.create(shape=self.shape, chunks=self.chunksize,
                                               store=self.dir_store, dtype=self.dtype, 
                                               overwrite=True)
        else:
            raise NotImplementedError("Storage backend not implemented.")
Beispiel #4
0
def test_set_orthogonal_selection_1d():

    # setup
    v = np.arange(1050, dtype=int)
    a = np.empty(v.shape, dtype=int)
    z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype)

    # test with different degrees of sparseness
    np.random.seed(42)
    for p in 0.5, 0.1, 0.01:

        # boolean arrays
        ix = np.random.binomial(1, p, size=a.shape[0]).astype(bool)
        _test_set_orthogonal_selection(v, a, z, ix)

        # integer arrays
        ix = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True)
        _test_set_orthogonal_selection(v, a, z, ix)
        ix.sort()
        _test_set_orthogonal_selection(v, a, z, ix)
        ix = ix[::-1]
        _test_set_orthogonal_selection(v, a, z, ix)

    # basic selections
    for selection in basic_selections_1d:
        _test_set_orthogonal_selection(v, a, z, selection)
Beispiel #5
0
def test_set_orthogonal_selection_3d():

    # setup
    v = np.arange(100000, dtype=int).reshape(200, 50, 10)
    a = np.empty_like(v)
    z = zarr.create(shape=a.shape, chunks=(60, 20, 3), dtype=a.dtype)

    np.random.seed(42)
    # test with different degrees of sparseness
    for p in 0.5, 0.1, 0.01:

        # boolean arrays
        ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool)
        ix1 = np.random.binomial(1, .5, size=a.shape[1]).astype(bool)
        ix2 = np.random.binomial(1, .5, size=a.shape[2]).astype(bool)
        _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2)

        # integer arrays
        ix0 = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True)
        ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * .5), replace=True)
        ix2 = np.random.choice(a.shape[2], size=int(a.shape[2] * .5), replace=True)
        _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2)

        # sorted increasing
        ix0.sort()
        ix1.sort()
        ix2.sort()
        _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2)

        # sorted decreasing
        ix0 = ix0[::-1]
        ix1 = ix1[::-1]
        ix2 = ix2[::-1]
        _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2)
Beispiel #6
0
def test_set_coordinate_selection_2d():

    # setup
    v = np.arange(10000, dtype=int).reshape(1000, 10)
    a = np.empty_like(v)
    z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype)

    np.random.seed(42)
    # test with different degrees of sparseness
    for p in 2, 0.5, 0.1, 0.01:
        n = int(a.size * p)
        ix0 = np.random.choice(a.shape[0], size=n, replace=True)
        ix1 = np.random.choice(a.shape[1], size=n, replace=True)

        selections = (
            (42, 4),
            (-1, -1),
            # index both axes with array
            (ix0, ix1),
            # mixed indexing with array / int
            (ix0, 4),
            (42, ix1),
        )
        for selection in selections:
            _test_set_coordinate_selection(v, a, z, selection)

    # multi-dimensional selection
    ix0 = np.array([[1, 2, 3],
                    [4, 5, 6]])
    ix1 = np.array([[1, 3, 2],
                    [2, 0, 5]])
    _test_set_coordinate_selection(v, a, z, (ix0, ix1))
 def create(cls, path: Path, array_info: ArrayInfo) -> "ZarrArray":
     assert array_info.data_format == cls.data_format
     assert array_info.chunks_per_shard == Vec3Int.full(
         1), "Zarr storage doesn't support sharding yet"
     zarr.create(
         shape=(array_info.num_channels, 1, 1, 1),
         chunks=(array_info.num_channels, ) +
         array_info.chunk_size.to_tuple(),
         dtype=array_info.voxel_type,
         compressor=(numcodecs.Blosc(
             cname="zstd", clevel=3, shuffle=numcodecs.Blosc.SHUFFLE)
                     if array_info.compression_mode else None),
         store=_fsstore_from_path(path),
         order="F",
     )
     return ZarrArray(path)
Beispiel #8
0
def prepare_zarr_storage(variations, out_path):
    store = zarr.DirectoryStore(str(out_path))
    root = zarr.group(store=store, overwrite=True)
    metadata = variations.metadata
    sources = []
    targets = []

    samples_array = variations.samples
    #samples_array.compute_chunk_sizes()
    sources.append(samples_array)

    object_codec = None
    if samples_array.dtype == object:
        object_codec = numcodecs.VLenUTF8()

    dataset = zarr.create(shape=samples_array.shape, path='samples', store=store,
                          dtype=samples_array.dtype, object_codec=object_codec)
    targets.append(dataset)

    variants = root.create_group(ZARR_VARIANTS_GROUP_NAME, overwrite=True)
    calls = root.create_group(ZARR_CALL_GROUP_NAME, overwrite=True)
    for field, array in variations.items():
        definition = ALLELE_ZARR_DEFINITION_MAPPINGS[field]

        field_metadata = metadata.get(field, None)
        array = variations[field]
        if array is None:
            continue
        array.compute_chunk_sizes()
        sources.append(array)

        group_name = definition['group']
        group = calls if group_name == ZARR_CALL_GROUP_NAME else variants
        path = os.path.sep + os.path.join(group.path, definition['field'])

        object_codec = None
        if array.dtype == object:
            object_codec = numcodecs.VLenUTF8()
        dataset = zarr.create(shape=array.shape, path=path, store=store,
                              object_codec=object_codec, dtype=array.dtype)
        if field_metadata is not None:
            for key, value in field_metadata.items():
                dataset.attrs[key] = value

        targets.append(dataset)
        lock = SerializableLock()
    return da.store(sources, targets, compute=False, lock=lock)
Beispiel #9
0
def zarr_array(args):
    storage = zarr.storage.NestedDirectoryStore(os.path.join(
        args.path, 'zarr'))
    arr = zarr.create(shape=(50000, 50000),
                      chunks=(1000, 1000),
                      dtype='int32',
                      store=storage,
                      overwrite=True)
    return arr
Beispiel #10
0
def test_get_coordinate_selection_2d():

    # setup
    a = np.arange(10000, dtype=int).reshape(1000, 10)
    z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype)
    z[:] = a

    np.random.seed(42)
    # test with different degrees of sparseness
    for p in 2, 0.5, 0.1, 0.01:
        n = int(a.size * p)
        ix0 = np.random.choice(a.shape[0], size=n, replace=True)
        ix1 = np.random.choice(a.shape[1], size=n, replace=True)
        selections = [
            # single value
            (42, 4),
            (-1, -1),
            # index both axes with array
            (ix0, ix1),
            # mixed indexing with array / int
            (ix0, 4),
            (42, ix1),
            (42, 4),
        ]
        for selection in selections:
            _test_get_coordinate_selection(a, z, selection)

    # not monotonically increasing (first dim)
    ix0 = [3, 3, 4, 2, 5]
    ix1 = [1, 3, 5, 7, 9]
    _test_get_coordinate_selection(a, z, (ix0, ix1))

    # not monotonically increasing (second dim)
    ix0 = [1, 1, 2, 2, 5]
    ix1 = [1, 3, 2, 1, 0]
    _test_get_coordinate_selection(a, z, (ix0, ix1))

    # multi-dimensional selection
    ix0 = np.array([[1, 1, 2],
                    [2, 2, 5]])
    ix1 = np.array([[1, 3, 2],
                    [1, 0, 0]])
    _test_get_coordinate_selection(a, z, (ix0, ix1))

    with pytest.raises(IndexError):
        selection = slice(5, 15), [1, 2, 3]
        z.get_coordinate_selection(selection)
    with pytest.raises(IndexError):
        selection = [1, 2, 3], slice(5, 15)
        z.get_coordinate_selection(selection)
    with pytest.raises(IndexError):
        selection = Ellipsis, [1, 2, 3]
        z.get_coordinate_selection(selection)
    with pytest.raises(IndexError):
        selection = Ellipsis
        z.get_coordinate_selection(selection)
Beispiel #11
0
def main():
    numcodecs.register_codec(PngCodec, "png")
    with Timer("Compress"):
        arr = zarr.create(
            shape=(10, 10, 1920, 1080, 7),
            dtype="uint8",
            compressor=PngCodec(solo_channel=True),
            store=zarr.MemoryStore(),
        )
        arr[:] = np.ones((10, 10, 1920, 1080, 7), dtype="uint8")
        print(arr[:].shape)
Beispiel #12
0
def test_set_mask_selection_2d():

    # setup
    v = np.arange(10000, dtype=int).reshape(1000, 10)
    a = np.empty_like(v)
    z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype)

    np.random.seed(42)
    # test with different degrees of sparseness
    for p in 0.5, 0.1, 0.01:
        ix = np.random.binomial(1, p, size=a.size).astype(bool).reshape(a.shape)
        _test_set_mask_selection(v, a, z, ix)
def time_zarr(dataset, batch_size=1, num_batches=1):
    if os.path.exists(dataset + "_zarr"):
        ds_zarr = zarr.open(dataset + "_zarr")
    else:
        ds_zarr = zarr.create(
            shape=(batch_size * num_batches, 785),
            chunks=(batch_size, None),
            store=dataset + "_zarr",
        )

    assert type(ds_zarr) == zarr.core.Array
    time_batches(ds_zarr, batch_size, num_batches)
Beispiel #14
0
def test_orthogonal_indexing_edge_cases():

    a = np.arange(6).reshape(1, 2, 3)
    z = zarr.create(shape=a.shape, chunks=(1, 2, 3), dtype=a.dtype)
    z[:] = a

    expect = oindex(a, (0, slice(None), [0, 1, 2]))
    actual = z.oindex[0, :, [0, 1, 2]]
    assert_array_equal(expect, actual)

    expect = oindex(a, (0, slice(None), [True, True, True]))
    actual = z.oindex[0, :, [True, True, True]]
    assert_array_equal(expect, actual)
Beispiel #15
0
def test_get_basic_selection_0d():

    # setup
    a = np.array(42)
    z = zarr.create(shape=a.shape, dtype=a.dtype, fill_value=None)
    z[...] = a

    assert_array_equal(a, z.get_basic_selection(Ellipsis))
    assert_array_equal(a, z[...])
    assert 42 == z.get_basic_selection(())
    assert 42 == z[()]

    # test out param
    b = np.zeros_like(a)
    z.get_basic_selection(Ellipsis, out=b)
    assert_array_equal(a, b)

    # test structured array
    value = (b'aaa', 1,  4.2)
    a = np.array(value, dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')])
    z = zarr.create(shape=a.shape, dtype=a.dtype, fill_value=None)
    z[()] = value
    assert_array_equal(a, z.get_basic_selection(Ellipsis))
    assert_array_equal(a, z[...])
    assert a[()] == z.get_basic_selection(())
    assert a[()] == z[()]
    assert b'aaa' == z.get_basic_selection((), fields='foo')
    assert b'aaa' == z['foo']
    assert a[['foo', 'bar']] == z.get_basic_selection((), fields=['foo', 'bar'])
    assert a[['foo', 'bar']] == z['foo', 'bar']
    # test out param
    b = np.zeros_like(a)
    z.get_basic_selection(Ellipsis, out=b)
    assert_array_equal(a, b)
    c = np.zeros_like(a[['foo', 'bar']])
    z.get_basic_selection(Ellipsis, out=c, fields=['foo', 'bar'])
    assert_array_equal(a[['foo', 'bar']], c)
Beispiel #16
0
def _zarr_create_with_attrs(  # type: ignore[no-untyped-def]
        shape, chunks, dtype, store, path, overwrite, attrs, **kwargs):
    # Create the zarr group and update its attributes within the same task (thread)
    arr = zarr.create(
        shape=shape,
        chunks=chunks,
        dtype=dtype,
        store=store,
        path=path,
        overwrite=overwrite,
        **kwargs,
    )
    if attrs is not None:
        arr.attrs.update(attrs)
    return arr
Beispiel #17
0
def test_set_basic_selection_0d():

    # setup
    v = np.array(42)
    a = np.zeros_like(v)
    z = zarr.zeros_like(v)
    assert_array_equal(a, z)

    # tests
    z.set_basic_selection(Ellipsis, v)
    assert_array_equal(v, z)
    z[...] = 0
    assert_array_equal(a, z)
    z[...] = v
    assert_array_equal(v, z)

    # test structured array
    value = (b'aaa', 1,  4.2)
    v = np.array(value, dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')])
    a = np.zeros_like(v)
    z = zarr.create(shape=a.shape, dtype=a.dtype, fill_value=None)

    # tests
    z.set_basic_selection(Ellipsis, v)
    assert_array_equal(v, z)
    z.set_basic_selection(Ellipsis, a)
    assert_array_equal(a, z)
    z[...] = v
    assert_array_equal(v, z)
    z[...] = a
    assert_array_equal(a, z)
    # with fields
    z.set_basic_selection(Ellipsis, v['foo'], fields='foo')
    assert v['foo'] == z['foo']
    assert a['bar'] == z['bar']
    assert a['baz'] == z['baz']
    z['bar'] = v['bar']
    assert v['foo'] == z['foo']
    assert v['bar'] == z['bar']
    assert a['baz'] == z['baz']
    # multiple field assignment not supported
    with pytest.raises(IndexError):
        z.set_basic_selection(Ellipsis, v[['foo', 'bar']], fields=['foo', 'bar'])
    with pytest.raises(IndexError):
        z[..., 'foo', 'bar'] = v[['foo', 'bar']]
Beispiel #18
0
def test_get_basic_selection_1d():

    # setup
    a = np.arange(1050, dtype=int)
    z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype)
    z[:] = a

    for selection in basic_selections_1d:
        _test_get_basic_selection(a, z, selection)

    for selection in basic_selections_1d_bad:
        with pytest.raises(IndexError):
            z.get_basic_selection(selection)
        with pytest.raises(IndexError):
            z[selection]

    with pytest.raises(IndexError):
        z.get_basic_selection([1, 0])
Beispiel #19
0
def test_set_mask_selection_1d():

    # setup
    v = np.arange(1050, dtype=int)
    a = np.empty_like(v)
    z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype)

    np.random.seed(42)
    # test with different degrees of sparseness
    for p in 0.5, 0.1, 0.01:
        ix = np.random.binomial(1, p, size=a.shape[0]).astype(bool)
        _test_set_mask_selection(v, a, z, ix)

    for selection in mask_selections_1d_bad:
        with pytest.raises(IndexError):
            z.set_mask_selection(selection, 42)
        with pytest.raises(IndexError):
            z.vindex[selection] = 42
Beispiel #20
0
def test_get_basic_selection_1d():

    # setup
    a = np.arange(1050, dtype=int)
    z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype)
    z[:] = a

    for selection in basic_selections_1d:
        _test_get_basic_selection(a, z, selection)

    bad_selections = basic_selections_1d_bad + [
        [0, 1],  # fancy indexing
    ]
    for selection in bad_selections:
        with pytest.raises(IndexError):
            z.get_basic_selection(selection)
        with pytest.raises(IndexError):
            z[selection]
Beispiel #21
0
def test_get_orthogonal_selection_2d():

    # setup
    a = np.arange(10000, dtype=int).reshape(1000, 10)
    z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype)
    z[:] = a

    np.random.seed(42)
    # test with different degrees of sparseness
    for p in 0.5, 0.1, 0.01:

        # boolean arrays
        ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool)
        ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool)
        _test_get_orthogonal_selection_2d(a, z, ix0, ix1)

        # mixed int array / bool array
        selections = (
            (ix0, np.nonzero(ix1)[0]),
            (np.nonzero(ix0)[0], ix1),
        )
        for selection in selections:
            _test_get_orthogonal_selection(a, z, selection)

        # integer arrays
        ix0 = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True)
        ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * .5), replace=True)
        _test_get_orthogonal_selection_2d(a, z, ix0, ix1)
        ix0.sort()
        ix1.sort()
        _test_get_orthogonal_selection_2d(a, z, ix0, ix1)
        ix0 = ix0[::-1]
        ix1 = ix1[::-1]
        _test_get_orthogonal_selection_2d(a, z, ix0, ix1)

    for selection in basic_selections_2d:
        _test_get_orthogonal_selection(a, z, selection)

    for selection in basic_selections_2d_bad:
        with pytest.raises(IndexError):
            z.get_orthogonal_selection(selection)
        with pytest.raises(IndexError):
            z.oindex[selection]
Beispiel #22
0
def test_get_coordinate_selection_1d():

    # setup
    a = np.arange(1050, dtype=int)
    z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype)
    z[:] = a

    np.random.seed(42)
    # test with different degrees of sparseness
    for p in 2, 0.5, 0.1, 0.01:
        n = int(a.size * p)
        ix = np.random.choice(a.shape[0], size=n, replace=True)
        _test_get_coordinate_selection(a, z, ix)
        ix.sort()
        _test_get_coordinate_selection(a, z, ix)
        ix = ix[::-1]
        _test_get_coordinate_selection(a, z, ix)

    selections = [
        # test single item
        42,
        -1,
        # test wraparound
        [0, 3, 10, -23, -12, -1],
        # test out of order
        [3, 105, 23, 127],  # not monotonically increasing
        # test multi-dimensional selection
        np.array([[2, 4], [6, 8]]),
    ]
    for selection in selections:
        _test_get_coordinate_selection(a, z, selection)

    # test errors
    bad_selections = coordinate_selections_1d_bad + [
        [a.shape[0] + 1],  # out of bounds
        [-(a.shape[0] + 1)],  # out of bounds
    ]
    for selection in bad_selections:
        with pytest.raises(IndexError):
            z.get_coordinate_selection(selection)
        with pytest.raises(IndexError):
            z.vindex[selection]
Beispiel #23
0
def test_get_mask_selection_2d():

    # setup
    a = np.arange(10000, dtype=int).reshape(1000, 10)
    z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype)
    z[:] = a

    np.random.seed(42)
    # test with different degrees of sparseness
    for p in 0.5, 0.1, 0.01:
        ix = np.random.binomial(1, p, size=a.size).astype(bool).reshape(a.shape)
        _test_get_mask_selection(a, z, ix)

    # test errors
    with pytest.raises(IndexError):
        z.vindex[np.zeros((1000, 5), dtype=bool)]  # too short
    with pytest.raises(IndexError):
        z.vindex[np.zeros((2000, 10), dtype=bool)]  # too long
    with pytest.raises(IndexError):
        z.vindex[[True, False]]  # wrong no. dimensions
Beispiel #24
0
def test_get_orthogonal_selection_1d_bool():

    # setup
    a = np.arange(1050, dtype=int)
    z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype)
    z[:] = a

    np.random.seed(42)
    # test with different degrees of sparseness
    for p in 0.5, 0.1, 0.01:
        ix = np.random.binomial(1, p, size=a.shape[0]).astype(bool)
        _test_get_orthogonal_selection(a, z, ix)

    # test errors
    with pytest.raises(IndexError):
        z.oindex[np.zeros(50, dtype=bool)]  # too short
    with pytest.raises(IndexError):
        z.oindex[np.zeros(2000, dtype=bool)]  # too long
    with pytest.raises(IndexError):
        z.oindex[[[True, False], [False, True]]]  # too many dimensions
Beispiel #25
0
def test_get_basic_selection_2d():

    # setup
    a = np.arange(10000, dtype=int).reshape(1000, 10)
    z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype)
    z[:] = a

    for selection in basic_selections_2d:
        _test_get_basic_selection(a, z, selection)

    bad_selections = basic_selections_2d_bad + [
        # integer arrays
        [0, 1],
        ([0, 1], [0, 1]),
        (slice(None), [0, 1]),
    ]
    for selection in bad_selections:
        with pytest.raises(IndexError):
            z.get_basic_selection(selection)
        with pytest.raises(IndexError):
            z[selection]
Beispiel #26
0
def time_zarr(dataset, batch_size=1):
    ds = hub.Dataset(dataset)
    if os.path.exists(dataset.split("/")[1] + "_zarr"):
        ds_zarr = zarr.open(dataset.split("/")[1] + "_zarr")
    else:
        store = zarr.DirectoryStore(dataset.split("/")[1] + "_zarr")
        shape = [
            ds["image"].shape[0],
            ds["image"].shape[1] * ds["image"].shape[2] * ds["image"].shape[3]
            + 1,
        ]
        ds_zarr = zarr.create((shape[0], shape[1]),
                              store=store,
                              chunks=(batch_size, None))
        for batch in range(ds.shape[0] // batch_size):
            ds_numpy = np.concatenate(
                (
                    ds["image", batch * batch_size:(batch + 1) *
                       batch_size].compute().reshape(batch_size, -1),
                    ds["label", batch * batch_size:(batch + 1) *
                       batch_size].compute().reshape(batch_size, -1),
                ),
                axis=1,
            )
            ds_zarr[batch * batch_size:(batch + 1) * batch_size] = ds_numpy

    assert type(ds_zarr) == zarr.core.Array

    with Timer("Time"):
        counter = 0
        t0 = time()
        for batch in range(ds.shape[0] // batch_size):
            x, y = (
                ds_zarr[batch * batch_size:(batch + 1) * batch_size, :-1],
                ds_zarr[batch * batch_size:(batch + 1) * batch_size, -1],
            )
            counter += 1
            t1 = time()
            print("Batch", counter, f"dt: {t1 - t0}")
            t0 = t1
Beispiel #27
0
def test_get_orthogonal_selection_1d_int():

    # setup
    a = np.arange(1050, dtype=int)
    z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype)
    z[:] = a

    np.random.seed(42)
    # test with different degrees of sparseness
    for p in 2, 0.5, 0.1, 0.01:
        # unordered
        ix = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True)
        _test_get_orthogonal_selection(a, z, ix)
        # increasing
        ix.sort()
        _test_get_orthogonal_selection(a, z, ix)
        # decreasing
        ix = ix[::-1]
        _test_get_orthogonal_selection(a, z, ix)

    selections = basic_selections_1d + [
        # test wraparound
        [0, 3, 10, -23, -12, -1],
        # explicit test not sorted
        [3, 105, 23, 127],

    ]
    for selection in selections:
        _test_get_orthogonal_selection(a, z, selection)

    bad_selections = basic_selections_1d_bad + [
        [a.shape[0] + 1],  # out of bounds
        [-(a.shape[0] + 1)],  # out of bounds
        [[2, 4], [6, 8]],  # too many dimensions
    ]
    for selection in bad_selections:
        with pytest.raises(IndexError):
            z.get_orthogonal_selection(selection)
        with pytest.raises(IndexError):
            z.oindex[selection]
Beispiel #28
0
def test_set_coordinate_selection_1d():

    # setup
    v = np.arange(1050, dtype=int)
    a = np.empty(v.shape, dtype=v.dtype)
    z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype)

    np.random.seed(42)
    # test with different degrees of sparseness
    for p in 2, 0.5, 0.1, 0.01:
        n = int(a.size * p)
        ix = np.random.choice(a.shape[0], size=n, replace=True)
        _test_set_coordinate_selection(v, a, z, ix)

    # multi-dimensional selection
    ix = np.array([[2, 4], [6, 8]])
    _test_set_coordinate_selection(v, a, z, ix)

    for selection in coordinate_selections_1d_bad:
        with pytest.raises(IndexError):
            z.set_coordinate_selection(selection, 42)
        with pytest.raises(IndexError):
            z.vindex[selection] = 42
Beispiel #29
0
def test_get_basic_selection_2d():

    # setup
    a = np.arange(10000, dtype=int).reshape(1000, 10)
    z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype)
    z[:] = a

    for selection in basic_selections_2d:
        _test_get_basic_selection(a, z, selection)

    bad_selections = basic_selections_2d_bad + [
        # integer arrays
        [0, 1],
        (slice(None), [0, 1]),
    ]
    for selection in bad_selections:
        with pytest.raises(IndexError):
            z.get_basic_selection(selection)
        with pytest.raises(IndexError):
            z[selection]
    # check fallback on fancy indexing
    fancy_selection = ([0, 1], [0, 1])
    np.testing.assert_array_equal(z[fancy_selection], [0, 11])
Beispiel #30
0
def test_get_mask_selection_1d():

    # setup
    a = np.arange(1050, dtype=int)
    z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype)
    z[:] = a

    np.random.seed(42)
    # test with different degrees of sparseness
    for p in 0.5, 0.1, 0.01:
        ix = np.random.binomial(1, p, size=a.shape[0]).astype(bool)
        _test_get_mask_selection(a, z, ix)

    # test errors
    bad_selections = mask_selections_1d_bad + [
        np.zeros(50, dtype=bool),  # too short
        np.zeros(2000, dtype=bool),  # too long
        [[True, False], [False, True]],  # too many dimensions
    ]
    for selection in bad_selections:
        with pytest.raises(IndexError):
            z.get_mask_selection(selection)
        with pytest.raises(IndexError):
            z.vindex[selection]
Beispiel #31
0
def test_get_selection_out():

    # basic selections
    a = np.arange(1050)
    z = zarr.create(shape=1050, chunks=100, dtype=a.dtype)
    z[:] = a
    selections = [
        slice(50, 150),
        slice(0, 1050),
        slice(1, 2),
    ]
    for selection in selections:
        expect = a[selection]
        out = zarr.create(shape=expect.shape, chunks=10, dtype=expect.dtype, fill_value=0)
        z.get_basic_selection(selection, out=out)
        assert_array_equal(expect, out[:])

    with pytest.raises(TypeError):
        z.get_basic_selection(Ellipsis, out=[])

    # orthogonal selections
    a = np.arange(10000, dtype=int).reshape(1000, 10)
    z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype)
    z[:] = a
    np.random.seed(42)
    # test with different degrees of sparseness
    for p in 0.5, 0.1, 0.01:
        ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool)
        ix1 = np.random.binomial(1, .5, size=a.shape[1]).astype(bool)
        selections = [
            # index both axes with array
            (ix0, ix1),
            # mixed indexing with array / slice
            (ix0, slice(1, 5)),
            (slice(250, 350), ix1),
            # mixed indexing with array / int
            (ix0, 4),
            (42, ix1),
            # mixed int array / bool array
            (ix0, np.nonzero(ix1)[0]),
            (np.nonzero(ix0)[0], ix1),
        ]
        for selection in selections:
            expect = oindex(a, selection)
            # out = zarr.create(shape=expect.shape, chunks=10, dtype=expect.dtype,
            #                         fill_value=0)
            out = np.zeros(expect.shape, dtype=expect.dtype)
            z.get_orthogonal_selection(selection, out=out)
            assert_array_equal(expect, out[:])

    # coordinate selections
    a = np.arange(10000, dtype=int).reshape(1000, 10)
    z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype)
    z[:] = a
    np.random.seed(42)
    # test with different degrees of sparseness
    for p in 0.5, 0.1, 0.01:
        n = int(a.size * p)
        ix0 = np.random.choice(a.shape[0], size=n, replace=True)
        ix1 = np.random.choice(a.shape[1], size=n, replace=True)
        selections = [
            # index both axes with array
            (ix0, ix1),
            # mixed indexing with array / int
            (ix0, 4),
            (42, ix1),
        ]
        for selection in selections:
            expect = a[selection]
            out = np.zeros(expect.shape, dtype=expect.dtype)
            z.get_coordinate_selection(selection, out=out)
            assert_array_equal(expect, out[:])
Beispiel #32
0
def test_get_selections_with_fields():

    a = [('aaa', 1, 4.2),
         ('bbb', 2, 8.4),
         ('ccc', 3, 12.6)]
    a = np.array(a, dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')])
    z = zarr.create(shape=a.shape, chunks=2, dtype=a.dtype, fill_value=None)
    z[:] = a

    fields_fixture = [
        'foo',
        ['foo'],
        ['foo', 'bar'],
        ['foo', 'baz'],
        ['bar', 'baz'],
        ['foo', 'bar', 'baz'],
        ['bar', 'foo'],
        ['baz', 'bar', 'foo'],
    ]

    for fields in fields_fixture:

        # total selection
        expect = a[fields]
        actual = z.get_basic_selection(Ellipsis, fields=fields)
        assert_array_equal(expect, actual)
        # alternative API
        if isinstance(fields, str):
            actual = z[fields]
            assert_array_equal(expect, actual)
        elif len(fields) == 2:
            actual = z[fields[0], fields[1]]
            assert_array_equal(expect, actual)
        if isinstance(fields, str):
            actual = z[..., fields]
            assert_array_equal(expect, actual)
        elif len(fields) == 2:
            actual = z[..., fields[0], fields[1]]
            assert_array_equal(expect, actual)

        # basic selection with slice
        expect = a[fields][0:2]
        actual = z.get_basic_selection(slice(0, 2), fields=fields)
        assert_array_equal(expect, actual)
        # alternative API
        if isinstance(fields, str):
            actual = z[0:2, fields]
            assert_array_equal(expect, actual)
        elif len(fields) == 2:
            actual = z[0:2, fields[0], fields[1]]
            assert_array_equal(expect, actual)

        # basic selection with single item
        expect = a[fields][1]
        actual = z.get_basic_selection(1, fields=fields)
        assert_array_equal(expect, actual)
        # alternative API
        if isinstance(fields, str):
            actual = z[1, fields]
            assert_array_equal(expect, actual)
        elif len(fields) == 2:
            actual = z[1, fields[0], fields[1]]
            assert_array_equal(expect, actual)

        # orthogonal selection
        ix = [0, 2]
        expect = a[fields][ix]
        actual = z.get_orthogonal_selection(ix, fields=fields)
        assert_array_equal(expect, actual)
        # alternative API
        if isinstance(fields, str):
            actual = z.oindex[ix, fields]
            assert_array_equal(expect, actual)
        elif len(fields) == 2:
            actual = z.oindex[ix, fields[0], fields[1]]
            assert_array_equal(expect, actual)

        # coordinate selection
        ix = [0, 2]
        expect = a[fields][ix]
        actual = z.get_coordinate_selection(ix, fields=fields)
        assert_array_equal(expect, actual)
        # alternative API
        if isinstance(fields, str):
            actual = z.vindex[ix, fields]
            assert_array_equal(expect, actual)
        elif len(fields) == 2:
            actual = z.vindex[ix, fields[0], fields[1]]
            assert_array_equal(expect, actual)

        # mask selection
        ix = [True, False, True]
        expect = a[fields][ix]
        actual = z.get_mask_selection(ix, fields=fields)
        assert_array_equal(expect, actual)
        # alternative API
        if isinstance(fields, str):
            actual = z.vindex[ix, fields]
            assert_array_equal(expect, actual)
        elif len(fields) == 2:
            actual = z.vindex[ix, fields[0], fields[1]]
            assert_array_equal(expect, actual)

    # missing/bad fields
    with pytest.raises(IndexError):
        z.get_basic_selection(Ellipsis, fields=['notafield'])
    with pytest.raises(IndexError):
        z.get_basic_selection(Ellipsis, fields=slice(None))