Ejemplo n.º 1
0
def test_rechunk_2d():
    """Try rechunking a random 2d matrix"""
    a = np.random.uniform(0, 1, 300).reshape((10, 30))
    x = da.from_array(a, chunks=((1, 2, 3, 4), (5, ) * 6))
    new = ((5, 5), (15, ) * 2)
    x2 = rechunk(x, chunks=new)
    assert x2.chunks == new
    assert np.all(x2.compute() == a)
Ejemplo n.º 2
0
def test_rechunk_1d():
    """Try rechunking a random 1d matrix"""
    a = np.random.uniform(0, 1, 300)
    x = da.from_array(a, chunks=((100, ) * 3, ))
    new = ((50, ) * 6,)
    x2 = rechunk(x, chunks=new)
    assert x2.chunks == new
    assert np.all(x2.compute() == a)
Ejemplo n.º 3
0
def test_rechunk_4d():
    """Try rechunking a random 4d matrix"""
    old = ((5, 5), ) * 4
    a = np.random.uniform(0, 1, 10000).reshape((10, ) * 4)
    x = da.from_array(a, chunks=old)
    new = ((10, ), ) * 4
    x2 = rechunk(x, chunks=new)
    assert x2.chunks == new
    assert np.all(x2.compute() == a)
Ejemplo n.º 4
0
def test_rechunk_blockshape():
    """ Test that blockshape can be used."""
    new_shape, new_chunks = (10, 10), (4, 3)
    new_blockdims = normalize_chunks(new_chunks, new_shape)
    old_chunks = ((4, 4, 2), (3, 3, 3, 1))
    a = np.random.uniform(0,1,100).reshape((10, 10))
    x = da.from_array(a, chunks=old_chunks)
    check1 = rechunk(x, chunks=new_chunks)
    assert check1.chunks == new_blockdims
    assert np.all(check1.compute() == a)
Ejemplo n.º 5
0
def test_rechunk_blockshape():
    """ Test that blockshape can be used."""
    new_shape, new_chunks = (10, 10), (4, 3)
    new_blockdims = normalize_chunks(new_chunks, new_shape)
    old_chunks = ((4, 4, 2), (3, 3, 3, 1))
    a = np.random.uniform(0, 1, 100).reshape((10, 10))
    x = da.from_array(a, chunks=old_chunks)
    check1 = rechunk(x, chunks=new_chunks)
    assert check1.chunks == new_blockdims
    assert np.all(check1.compute() == a)
Ejemplo n.º 6
0
def histogramdd(data, bins=None, *args, **kwargs):
    """Facade function to create multi-dimensional histogram using dask.
    
    Each "column" must be one-dimensional.
    """
    import dask
    from dask.array.rechunk import rechunk

    if isinstance(data, (list, tuple)):
        data = dask.array.stack(data, axis=1)

    if not hasattr(data, "dask"):
        data = dask.array.from_array(data,
                                     chunks=(int(data.shape[0] /
                                                 options["chunk_split"]),
                                             data.shape[1]))
    else:
        data = rechunk(data, {1: data.shape[1]})

    if isinstance(data, dask.array.Array):
        if data.ndim != 2:
            raise ValueError(
                f"Only (n, dim) data allowed for histogramdd, {data.shape} encountered."
            )

    if not kwargs.get("adaptive", True):
        raise ValueError(
            "Only adaptive histograms supported for dask (currently).")
    kwargs["adaptive"] = True

    def block_hist(array):
        return original_hdd(array, bins, *args, **kwargs)

    return _run_dask(name="dask_adaptive_dd",
                     data=data,
                     compute=kwargs.pop("compute", True),
                     method=kwargs.pop("dask_method", "threaded"),
                     func=block_hist,
                     expand_arg=True)