Ejemplo n.º 1
0
def test_no_intermediate():
    a = zarr.ones((4, 4), chunks=(2, 2))
    b = zarr.ones((4, 4), chunks=(4, 1))
    rechunked = api.Rechunked(None,
                              None,
                              source=a,
                              intermediate=None,
                              target=b)
    assert "Intermediate" not in repr(rechunked)
    rechunked._repr_html_()
Ejemplo n.º 2
0
def rechunked_fn(tmp_path, request):
    if request.param == "Group":
        store_source = str(tmp_path / "source.zarr")
        group = zarr.group(store_source)
        group.attrs["foo"] = "bar"
        # 800 byte chunks
        a = group.ones("a", shape=(5, 10, 20), chunks=(1, 10, 20), dtype="f4")
        a.attrs["foo"] = "bar"
        b = group.ones("b", shape=(8000, ), chunks=(100, ), dtype="f4")
        b.attrs["foo"] = "bar"

        target_store = str(tmp_path / "target.zarr")
        temp_store = str(tmp_path / "temp.zarr")

        max_mem = 16000  # should force a two-step plan for b
        target_chunks = {"a": (5, 10, 4), "b": (4000, )}

        rechunked_fn = partial(
            api.rechunk,
            group,
            target_chunks,
            max_mem,
            target_store,
            temp_store=temp_store,
        )
    else:
        shape = (8000, 8000)
        source_chunks = (200, 8000)
        dtype = "f4"
        max_mem = 25600000
        dims = None
        target_chunks = (8000, 200)

        store_source = str(tmp_path / "source.zarr")
        source_array = zarr.ones(shape,
                                 chunks=source_chunks,
                                 dtype=dtype,
                                 store=store_source)
        # add some attributes
        source_array.attrs["foo"] = "bar"
        if dims:
            source_array.attrs[_DIMENSION_KEY] = dims

        ### Create targets ###
        target_store = str(tmp_path / "target.zarr")
        temp_store = str(tmp_path / "temp.zarr")

        rechunked_fn = partial(
            api.rechunk,
            source_array,
            target_chunks,
            max_mem,
            target_store,
            temp_store=temp_store,
        )
    return rechunked_fn
Ejemplo n.º 3
0
def test_ones_like():
    z = ones(100, 10)
    z2 = ones_like(z)
    eq(z.shape, z2.shape)
    eq(z.chunks, z2.chunks)
    eq(z.dtype, z2.dtype)
    eq(z.cname, z2.cname)
    eq(z.clevel, z2.clevel)
    eq(z.shuffle, z2.shuffle)
    eq(z.fill_value, z2.fill_value)
Ejemplo n.º 4
0
def test_ones_like():
    z = ones(100, 10)
    z2 = ones_like(z)
    eq(z.shape, z2.shape)
    eq(z.chunks, z2.chunks)
    eq(z.dtype, z2.dtype)
    eq(z.cname, z2.cname)
    eq(z.clevel, z2.clevel)
    eq(z.shuffle, z2.shuffle)
    eq(z.fill_value, z2.fill_value)
Ejemplo n.º 5
0
def sample_zarr_array(tmp_path):
    shape = (8000, 8000)
    source_chunks = (200, 8000)
    dtype = "f4"
    dims = None

    path = str(tmp_path / "source.zarr")
    array = zarr.ones(shape, chunks=source_chunks, dtype=dtype, store=path)
    # add some attributes
    array.attrs["foo"] = "bar"
    if dims:
        array.attrs[_DIMENSION_KEY] = dims
    return array
Ejemplo n.º 6
0
def rechunk_delayed(tmp_path):
    store_source = str(tmp_path / 'source.zarr')
    shape = (8000, 8000)
    source_chunks = (200, 8000)
    dtype = 'f4'

    a_source = zarr.ones(shape, chunks=source_chunks,
                         dtype=dtype, store=store_source)

    target_store = str(tmp_path / 'target.zarr')
    temp_store = str(tmp_path / 'temp.zarr')
    max_mem = 25600000
    target_chunks = (8000, 200)
    return api.rechunk_zarr2zarr_w_dask(a_source, target_chunks, max_mem,
                                     target_store, temp_store=temp_store), target_store
Ejemplo n.º 7
0
def test_pywren_function_executor(tmp_path):
    pytest.importorskip("pywren_ibm_cloud")
    from rechunker.executors.pywren import (
        pywren_local_function_executor,
        PywrenExecutor,
    )

    # Create a Pywren function exectutor that we manage ourselves
    # and pass in to rechunker's PywrenExecutor
    with pywren_local_function_executor() as function_executor:

        executor = PywrenExecutor(function_executor)

        shape = (8000, 8000)
        source_chunks = (200, 8000)
        dtype = "f4"
        max_mem = 25600000
        target_chunks = (400, 8000)

        ### Create source array ###
        store_source = str(tmp_path / "source.zarr")
        source_array = zarr.ones(shape,
                                 chunks=source_chunks,
                                 dtype=dtype,
                                 store=store_source)

        ### Create targets ###
        target_store = str(tmp_path / "target.zarr")
        temp_store = str(tmp_path / "temp.zarr")

        rechunked = api.rechunk(
            source_array,
            target_chunks,
            max_mem,
            target_store,
            temp_store=temp_store,
            executor=executor,
        )
        assert isinstance(rechunked, api.Rechunked)

        target_array = zarr.open(target_store)

        assert target_array.chunks == tuple(target_chunks)

        result = rechunked.execute()
        assert isinstance(result, zarr.Array)
        a_tar = dsa.from_zarr(target_array)
        assert dsa.equal(a_tar, 1).all().compute()
Ejemplo n.º 8
0
def test_equality_operator():
    import operator

    import dask.array as da
    import numpy as np
    import xarray as xr
    import zarr

    class MyNPArray(np.ndarray):
        pass

    assert pick_equality_operator(np.ones((1, 1))) == np.array_equal
    assert pick_equality_operator(MyNPArray([1, 1])) == np.array_equal
    assert pick_equality_operator(da.ones((1, 1))) == operator.is_
    assert pick_equality_operator(zarr.ones((1, 1))) == operator.is_
    assert (pick_equality_operator(xr.DataArray(np.ones(
        (1, 1)))) == np.array_equal)
Ejemplo n.º 9
0
def test_no_intermediate_fused(tmp_path):
    shape = (8000, 8000)
    source_chunks = (200, 8000)
    dtype = "f4"
    max_mem = 25600000
    target_chunks = (400, 8000)

    store_source = str(tmp_path / "source.zarr")
    source_array = zarr.ones(shape,
                             chunks=source_chunks,
                             dtype=dtype,
                             store=store_source)

    target_store = str(tmp_path / "target.zarr")

    rechunked = api.rechunk(source_array, target_chunks, max_mem, target_store)

    num_tasks = len(
        [v for v in rechunked.plan.dask.values() if dask.core.istask(v)])
    assert num_tasks < 20  # less than if no fuse
Ejemplo n.º 10
0
def test_equality_operator():
    import operator

    import dask.array as da
    import numpy as np
    import xarray as xr
    import zarr

    class MyNPArray(np.ndarray):
        pass

    assert pick_equality_operator(np.ones((1, 1))) == _quiet_array_equal
    assert pick_equality_operator(MyNPArray([1, 1])) == _quiet_array_equal
    assert pick_equality_operator(da.ones((1, 1))) == operator.is_
    assert pick_equality_operator(zarr.ones((1, 1))) == operator.is_
    assert (pick_equality_operator(xr.DataArray(np.ones(
        (1, 1)))) == _quiet_array_equal)
    eq = pick_equality_operator(np.asarray([]))
    # make sure this doesn't warn
    assert not eq(np.asarray([]), np.asarray([], '<U32'))
Ejemplo n.º 11
0
def test_rechunk_array(tmp_path, shape, source_chunks, dtype, dims,
                       target_chunks, max_mem, executor):

    ### Create source array ###
    store_source = str(tmp_path / "source.zarr")
    source_array = zarr.ones(shape,
                             chunks=source_chunks,
                             dtype=dtype,
                             store=store_source)
    # add some attributes
    source_array.attrs["foo"] = "bar"
    if dims:
        source_array.attrs[_DIMENSION_KEY] = dims

    ### Create targets ###
    target_store = str(tmp_path / "target.zarr")
    temp_store = str(tmp_path / "temp.zarr")

    rechunked = api.rechunk(
        source_array,
        target_chunks,
        max_mem,
        target_store,
        temp_store=temp_store,
        executor=executor,
    )
    assert isinstance(rechunked, api.Rechunked)

    target_array = zarr.open(target_store)

    if isinstance(target_chunks, dict):
        target_chunks_list = [target_chunks[d] for d in dims]
    else:
        target_chunks_list = target_chunks
    assert target_array.chunks == tuple(target_chunks_list)
    assert dict(source_array.attrs) == dict(target_array.attrs)

    result = rechunked.execute()
    assert isinstance(result, zarr.Array)
    a_tar = dsa.from_zarr(target_array)
    assert dsa.equal(a_tar, 1).all().compute()
Ejemplo n.º 12
0
def test_ones():
    z = ones(100, 10)
    eq((100, ), z.shape)
    eq((10, ), z.chunks)
    assert_array_equal(np.ones(100), z[:])
Ejemplo n.º 13
0
def test_ones():
    z = ones(100, 10)
    eq((100,), z.shape)
    eq((10,), z.chunks)
    assert_array_equal(np.ones(100), z[:])