Exemple #1
0
def test_depth_greater_than_dim():
    a = np.arange(144).reshape(12, 12)
    darr = da.from_array(a, chunks=(3, 5))

    depth = {0: 13, 1: 4}
    with pytest.raises(ValueError, match="The overlapping depth"):
        overlap(darr, depth=depth, boundary=1)
Exemple #2
0
def test_overlap():
    x = np.arange(64).reshape((8, 8))
    d = da.from_array(x, chunks=(4, 4))
    g = overlap(d, depth={0: 2, 1: 1}, boundary={0: 100, 1: "reflect"})
    assert g.chunks == ((8, 8), (6, 6))
    expected = np.array([
        [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100],
        [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100],
        [0, 0, 1, 2, 3, 4, 3, 4, 5, 6, 7, 7],
        [8, 8, 9, 10, 11, 12, 11, 12, 13, 14, 15, 15],
        [16, 16, 17, 18, 19, 20, 19, 20, 21, 22, 23, 23],
        [24, 24, 25, 26, 27, 28, 27, 28, 29, 30, 31, 31],
        [32, 32, 33, 34, 35, 36, 35, 36, 37, 38, 39, 39],
        [40, 40, 41, 42, 43, 44, 43, 44, 45, 46, 47, 47],
        [16, 16, 17, 18, 19, 20, 19, 20, 21, 22, 23, 23],
        [24, 24, 25, 26, 27, 28, 27, 28, 29, 30, 31, 31],
        [32, 32, 33, 34, 35, 36, 35, 36, 37, 38, 39, 39],
        [40, 40, 41, 42, 43, 44, 43, 44, 45, 46, 47, 47],
        [48, 48, 49, 50, 51, 52, 51, 52, 53, 54, 55, 55],
        [56, 56, 57, 58, 59, 60, 59, 60, 61, 62, 63, 63],
        [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100],
        [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100],
    ])
    assert_eq(g, expected)
    assert same_keys(
        g, overlap(d, depth={
            0: 2,
            1: 1
        }, boundary={
            0: 100,
            1: "reflect"
        }))

    g = overlap(d, depth={0: 2, 1: 1}, boundary={0: 100, 1: "none"})
    expected = np.array([
        [100, 100, 100, 100, 100, 100, 100, 100, 100, 100],
        [100, 100, 100, 100, 100, 100, 100, 100, 100, 100],
        [0, 1, 2, 3, 4, 3, 4, 5, 6, 7],
        [8, 9, 10, 11, 12, 11, 12, 13, 14, 15],
        [16, 17, 18, 19, 20, 19, 20, 21, 22, 23],
        [24, 25, 26, 27, 28, 27, 28, 29, 30, 31],
        [32, 33, 34, 35, 36, 35, 36, 37, 38, 39],
        [40, 41, 42, 43, 44, 43, 44, 45, 46, 47],
        [16, 17, 18, 19, 20, 19, 20, 21, 22, 23],
        [24, 25, 26, 27, 28, 27, 28, 29, 30, 31],
        [32, 33, 34, 35, 36, 35, 36, 37, 38, 39],
        [40, 41, 42, 43, 44, 43, 44, 45, 46, 47],
        [48, 49, 50, 51, 52, 51, 52, 53, 54, 55],
        [56, 57, 58, 59, 60, 59, 60, 61, 62, 63],
        [100, 100, 100, 100, 100, 100, 100, 100, 100, 100],
        [100, 100, 100, 100, 100, 100, 100, 100, 100, 100],
    ])
    assert_eq(g, expected)
    assert g.chunks == ((8, 8), (5, 5))
Exemple #3
0
def test_nearest_overlap():
    a = np.arange(144).reshape(12, 12).astype(float)

    darr = da.from_array(a, chunks=(6, 6))
    garr = overlap(darr, depth={0: 5, 1: 5}, boundary={0: "nearest", 1: "nearest"})
    tarr = trim_internal(garr, {0: 5, 1: 5}, boundary="nearest")
    assert_array_almost_equal(tarr, a)
Exemple #4
0
def test_nearest_overlap():
    a = np.arange(144).reshape(12, 12).astype(float)

    darr = da.from_array(a, chunks=(6, 6))
    garr = overlap(darr, depth={0: 5, 1: 5},
                   boundary={0: 'nearest', 1: 'nearest'})
    tarr = trim_internal(garr, {0: 5, 1: 5})
    assert_array_almost_equal(tarr, a)
Exemple #5
0
def test_overlap():
    x = np.arange(64).reshape((8, 8))
    d = da.from_array(x, chunks=(4, 4))
    g = overlap(d, depth={0: 2, 1: 1}, boundary={0: 100, 1: 'reflect'})
    assert g.chunks == ((8, 8), (6, 6))
    expected = np.array(
        [[100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100],
         [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100],
         [  0,   0,   1,   2,   3,   4,   3,   4,   5,   6,   7,   7],
         [  8,   8,   9,  10,  11,  12,  11,  12,  13,  14,  15,  15],
         [ 16,  16,  17,  18,  19,  20,  19,  20,  21,  22,  23,  23],
         [ 24,  24,  25,  26,  27,  28,  27,  28,  29,  30,  31,  31],
         [ 32,  32,  33,  34,  35,  36,  35,  36,  37,  38,  39,  39],
         [ 40,  40,  41,  42,  43,  44,  43,  44,  45,  46,  47,  47],
         [ 16,  16,  17,  18,  19,  20,  19,  20,  21,  22,  23,  23],
         [ 24,  24,  25,  26,  27,  28,  27,  28,  29,  30,  31,  31],
         [ 32,  32,  33,  34,  35,  36,  35,  36,  37,  38,  39,  39],
         [ 40,  40,  41,  42,  43,  44,  43,  44,  45,  46,  47,  47],
         [ 48,  48,  49,  50,  51,  52,  51,  52,  53,  54,  55,  55],
         [ 56,  56,  57,  58,  59,  60,  59,  60,  61,  62,  63,  63],
         [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100],
         [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100]])
    assert_eq(g, expected)
    assert same_keys(g, overlap(d, depth={0: 2, 1: 1},
                                boundary={0: 100, 1: 'reflect'}))

    g = overlap(d, depth={0: 2, 1: 1}, boundary={0: 100, 1: 'none'})
    expected = np.array(
        [[100, 100, 100, 100, 100, 100, 100, 100, 100, 100],
         [100, 100, 100, 100, 100, 100, 100, 100, 100, 100],
         [  0,   1,   2,   3,   4,   3,   4,   5,   6,   7],
         [  8,   9,  10,  11,  12,  11,  12,  13,  14,  15],
         [ 16,  17,  18,  19,  20,  19,  20,  21,  22,  23],
         [ 24,  25,  26,  27,  28,  27,  28,  29,  30,  31],
         [ 32,  33,  34,  35,  36,  35,  36,  37,  38,  39],
         [ 40,  41,  42,  43,  44,  43,  44,  45,  46,  47],
         [ 16,  17,  18,  19,  20,  19,  20,  21,  22,  23],
         [ 24,  25,  26,  27,  28,  27,  28,  29,  30,  31],
         [ 32,  33,  34,  35,  36,  35,  36,  37,  38,  39],
         [ 40,  41,  42,  43,  44,  43,  44,  45,  46,  47],
         [ 48,  49,  50,  51,  52,  51,  52,  53,  54,  55],
         [ 56,  57,  58,  59,  60,  59,  60,  61,  62,  63],
         [100, 100, 100, 100, 100, 100, 100, 100, 100, 100],
         [100, 100, 100, 100, 100, 100, 100, 100, 100, 100]])
    assert_eq(g, expected)
    assert g.chunks == ((8, 8), (5, 5))
Exemple #6
0
def test_depth_greater_than_smallest_chunk_combines_chunks(chunks):
    a = np.arange(144).reshape(12, 12)
    darr = da.from_array(a, chunks=chunks)

    depth = {0: 4, 1: 2}
    output = overlap(darr, depth=depth, boundary=1)

    assert all(c >= depth[0] * 2 for c in output.chunks[0])
    assert all(c >= depth[1] * 2 for c in output.chunks[1])
Exemple #7
0
def test_different_depths_and_boundary_combinations(depth):
    expected = np.arange(100).reshape(10, 10)
    darr = da.from_array(expected, chunks=(5, 2))

    reflected = overlap(darr, depth=depth, boundary="reflect")
    nearest = overlap(darr, depth=depth, boundary="nearest")
    periodic = overlap(darr, depth=depth, boundary="periodic")
    constant = overlap(darr, depth=depth, boundary=42)

    result = trim_internal(reflected, depth, boundary="reflect")
    assert_array_equal(result, expected)

    result = trim_internal(nearest, depth, boundary="nearest")
    assert_array_equal(result, expected)

    result = trim_internal(periodic, depth, boundary="periodic")
    assert_array_equal(result, expected)

    result = trim_internal(constant, depth, boundary=42)
    assert_array_equal(result, expected)
Exemple #8
0
def test_depth_greater_than_boundary_length():
    expected = np.arange(100).reshape(10, 10)
    darr = da.from_array(expected, chunks=(5, 5))

    depth = {0: 8, 1: 7}

    reflected = overlap(darr, depth=depth, boundary="reflect")
    nearest = overlap(darr, depth=depth, boundary="nearest")
    periodic = overlap(darr, depth=depth, boundary="periodic")
    constant = overlap(darr, depth=depth, boundary=42)

    result = trim_internal(reflected, depth)
    assert_array_equal(result, expected)

    result = trim_internal(nearest, depth)
    assert_array_equal(result, expected)

    result = trim_internal(periodic, depth)
    assert_array_equal(result, expected)

    result = trim_internal(constant, depth)
    assert_array_equal(result, expected)
Exemple #9
0
def test_depth_greater_than_boundary_length():
    expected = np.arange(100).reshape(10, 10)
    darr = da.from_array(expected, chunks=(5, 5))

    depth = {0: 8, 1: 7}

    reflected = overlap(darr, depth=depth, boundary='reflect')
    nearest = overlap(darr, depth=depth, boundary='nearest')
    periodic = overlap(darr, depth=depth, boundary='periodic')
    constant = overlap(darr, depth=depth, boundary=42)

    result = trim_internal(reflected, depth)
    assert_array_equal(result, expected)

    result = trim_internal(nearest, depth)
    assert_array_equal(result, expected)

    result = trim_internal(periodic, depth)
    assert_array_equal(result, expected)

    result = trim_internal(constant, depth)
    assert_array_equal(result, expected)
def test_depth_equals_boundary_length():
    expected = np.arange(100).reshape(10, 10)
    darr = da.from_array(expected, chunks=(5, 5))

    depth = {0: 5, 1: 5}

    reflected = overlap(darr, depth=depth, boundary='reflect')
    nearest = overlap(darr, depth=depth, boundary='nearest')
    periodic = overlap(darr, depth=depth, boundary='periodic')
    constant = overlap(darr, depth=depth, boundary=42)

    result = trim_internal(reflected, depth)
    assert_array_equal(result, expected)

    result = trim_internal(nearest, depth)
    assert_array_equal(result, expected)

    result = trim_internal(periodic, depth)
    assert_array_equal(result, expected)

    result = trim_internal(constant, depth)
    assert_array_equal(result, expected)
Exemple #11
0
def map_overlap(func,
                *args,
                depth=None,
                boundary=None,
                trim=True,
                align_arrays=True,
                **kwargs):
    """ Map a function over blocks of arrays with some overlap

    We share neighboring zones between blocks of the array, map a
    function, and then trim away the neighboring strips.

    Parameters
    ----------
    func: function
        The function to apply to each extended block
    args : dask arrays
    depth: int, tuple, dict or list
        The number of elements that each block should share with its neighbors
        If a tuple or dict then this can be different per axis.
        If a list then each element of that list must be an int, tuple or dict
        defining depth for the corresponding array in `args`.
        Asymmetric depths may be specified using a dict value of (-/+) tuples.
        Note that asymmetric depths are currently only supported when
        ``boundary`` is 'none'.
        The default value is 0.
    boundary: str, tuple, dict or list
        How to handle the boundaries.
        Values include 'reflect', 'periodic', 'nearest', 'none',
        or any constant value like 0 or np.nan.
        If a list then each element must be a str, tuple or dict defining the
        boundary for the corresponding array in `args`.
        The default value is 'reflect'.
    trim: bool
        Whether or not to trim ``depth`` elements from each block after
        calling the map function.
        Set this to False if your mapping function already does this for you
    align_arrays: bool
        Whether or not to align chunks along equally sized dimensions when
        multiple arrays are provided.  This allows for larger chunks in some
        arrays to be broken into smaller ones that match chunk sizes in other
        arrays such that they are compatible for block function mapping. If
        this is false, then an error will be thrown if arrays do not already
        have the same number of blocks in each dimensions.
    **kwargs:
        Other keyword arguments valid in ``map_blocks``

    Examples
    --------
    >>> import numpy as np
    >>> import dask.array as da

    >>> x = np.array([1, 1, 2, 3, 3, 3, 2, 1, 1])
    >>> x = da.from_array(x, chunks=5)
    >>> def derivative(x):
    ...     return x - np.roll(x, 1)

    >>> y = x.map_overlap(derivative, depth=1, boundary=0)
    >>> y.compute()
    array([ 1,  0,  1,  1,  0,  0, -1, -1,  0])

    >>> x = np.arange(16).reshape((4, 4))
    >>> d = da.from_array(x, chunks=(2, 2))
    >>> d.map_overlap(lambda x: x + x.size, depth=1).compute()
    array([[16, 17, 18, 19],
           [20, 21, 22, 23],
           [24, 25, 26, 27],
           [28, 29, 30, 31]])

    >>> func = lambda x: x + x.size
    >>> depth = {0: 1, 1: 1}
    >>> boundary = {0: 'reflect', 1: 'none'}
    >>> d.map_overlap(func, depth, boundary).compute()  # doctest: +NORMALIZE_WHITESPACE
    array([[12,  13,  14,  15],
           [16,  17,  18,  19],
           [20,  21,  22,  23],
           [24,  25,  26,  27]])
    """
    # Look for invocation using deprecated single-array signature
    # map_overlap(x, func, depth, boundary=None, trim=True, **kwargs)
    if isinstance(func, Array) and callable(args[0]):
        warnings.warn(
            "Detected use of signature map_overlap(x, func) rather than "
            "map_overlap(func, *args) for multi-array support. Arguments "
            "will be swapped in this case but such an exception will not "
            "be made in a future release.",
            FutureWarning,
        )
        sig = ["func", "depth", "boundary", "trim"]
        depth = get(sig.index("depth"), args, depth)
        boundary = get(sig.index("boundary"), args, boundary)
        trim = get(sig.index("trim"), args, trim)
        func, args = args[0], [func]

    if not callable(func):
        raise TypeError("First argument must be callable function, not {}\n"
                        "Usage:   da.map_overlap(function, x)\n"
                        "   or:   da.map_overlap(function, x, y, z)".format(
                            type(func).__name__))
    if not all(isinstance(x, Array) for x in args):
        raise TypeError("All variadic arguments must be arrays, not {}\n"
                        "Usage:   da.map_overlap(function, x)\n"
                        "   or:   da.map_overlap(function, x, y, z)".format(
                            [type(x).__name__ for x in args]))

    # Coerce depth and boundary arguments to lists of individual
    # specifications for each array argument
    def coerce(xs, arg, fn):
        if not isinstance(arg, list):
            arg = [arg] * len(xs)
        return [fn(x.ndim, a) for x, a in zip(xs, arg)]

    depth = coerce(args, depth, coerce_depth)
    boundary = coerce(args, boundary, coerce_boundary)

    # Align chunks in each array to a common size
    if align_arrays:
        # Reverse unification order to allow block broadcasting
        inds = [list(reversed(range(x.ndim))) for x in args]
        _, args = da.core.unify_chunks(*list(concat(zip(args, inds))),
                                       warn=False)

    for i, x in enumerate(args):
        for j in range(x.ndim):
            if isinstance(depth[i][j], tuple) and boundary[i][j] != "none":
                raise NotImplementedError(
                    "Asymmetric overlap is currently only implemented "
                    "for boundary='none', however boundary for dimension "
                    "{} in array argument {} is {}".format(
                        j, i, boundary[i][j]))

    def assert_int_chunksize(xs):
        assert all(type(c) is int for x in xs for cc in x.chunks for c in cc)

    assert_int_chunksize(args)
    args = [
        overlap(x, depth=d, boundary=b)
        for x, d, b in zip(args, depth, boundary)
    ]
    assert_int_chunksize(args)
    x = da.map_blocks(func, *args, **kwargs)
    assert_int_chunksize([x])
    if trim:
        # Find index of array argument with maximum rank and break ties by choosing first provided
        i = sorted(enumerate(args), key=lambda v: (v[1].ndim, -v[0]))[-1][0]
        # Trim using depth/boundary setting for array of highest rank
        return trim_internal(x, depth[i], boundary[i])
    else:
        return x
Exemple #12
0
def dask_curvilinear(glyph, xr_ds, schema, canvas, summary, cuda):
    shape, bounds, st, axis = shape_bounds_st_and_axis(xr_ds, canvas, glyph)

    # Compile functions
    create, info, append, combine, finalize = \
        compile_components(summary, schema, glyph, cuda=cuda)
    x_mapper = canvas.x_axis.mapper
    y_mapper = canvas.y_axis.mapper
    extend = glyph._build_extend(x_mapper, y_mapper, info, append)

    x_coord_name = glyph.x
    y_coord_name = glyph.y
    z_name = glyph.name

    data_dim_names = list(xr_ds[z_name].dims)
    x_coord_dim_names = list(xr_ds[x_coord_name].dims)
    y_coord_dim_names = list(xr_ds[y_coord_name].dims)
    zs = xr_ds[z_name].data
    x_centers = xr_ds[glyph.x].data
    y_centers = xr_ds[glyph.y].data

    var_name = list(xr_ds.data_vars.keys())[0]

    # Validate coordinates
    err_msg = (
        "DataArray {name} is backed by a Dask array, \n"
        "but coordinate {coord} is not backed by a Dask array with identical \n"
        "dimension order and chunks")
    if (not isinstance(x_centers, dask.array.Array)
            or xr_ds[glyph.name].dims != xr_ds[glyph.x].dims
            or xr_ds[glyph.name].chunks != xr_ds[glyph.x].chunks):
        raise ValueError(err_msg.format(name=glyph.name, coord=glyph.x))

    if (not isinstance(y_centers, dask.array.Array)
            or xr_ds[glyph.name].dims != xr_ds[glyph.y].dims
            or xr_ds[glyph.name].chunks != xr_ds[glyph.y].chunks):
        raise ValueError(err_msg.format(name=glyph.name, coord=glyph.y))

    # Make sure coordinates are floats so that overlap with nan will behave properly
    if x_centers.dtype.kind != 'f':
        x_centers = x_centers.astype(np.float64)
    if y_centers.dtype.kind != 'f':
        y_centers = y_centers.astype(np.float64)

    x_overlapped_centers = overlap(x_centers, depth=1, boundary=np.nan)
    y_overlapped_centers = overlap(y_centers, depth=1, boundary=np.nan)

    def chunk(np_zs, np_x_centers, np_y_centers):

        # Handle boundaries that have nothing to overlap with
        for centers in [np_x_centers, np_y_centers]:
            if np.isnan(centers[0, :]).all():
                centers[0, :] = centers[1, :] - (centers[2, :] - centers[1, :])
            if np.isnan(centers[-1, :]).all():
                centers[-1, :] = centers[-2, :] + (centers[-2, :] -
                                                   centers[-3, :])
            if np.isnan(centers[:, 0]).all():
                centers[:, 0] = centers[:, 1] - (centers[:, 2] - centers[:, 1])
            if np.isnan(centers[:, -1]).all():
                centers[:,
                        -1] = centers[:,
                                      -2] + (centers[:, -2] - centers[:, -3])

        # compute interval breaks
        x_breaks_chunk = glyph.infer_interval_breaks(np_x_centers)
        y_breaks_chunk = glyph.infer_interval_breaks(np_y_centers)

        # trim breaks
        x_breaks_chunk = x_breaks_chunk[1:-1, 1:-1]
        y_breaks_chunk = y_breaks_chunk[1:-1, 1:-1]

        # Reconstruct dataset for chunk from numpy array and chunk indices
        chunk_coords = {
            x_coord_name: (x_coord_dim_names, np_x_centers[1:-1, 1:-1]),
            y_coord_name: (y_coord_dim_names, np_y_centers[1:-1, 1:-1]),
        }
        chunk_ds = xr.DataArray(np_zs,
                                coords=chunk_coords,
                                dims=data_dim_names,
                                name=var_name).to_dataset()

        # Initialize aggregation buffers
        aggs = create(shape)

        # Perform aggregation
        extend(aggs,
               chunk_ds,
               st,
               bounds,
               x_breaks=x_breaks_chunk,
               y_breaks=y_breaks_chunk)
        return aggs

    result_name = tokenize(xr_ds.__dask_tokenize__(), canvas, glyph, summary)

    z_keys = [k for row in zs.__dask_keys__() for k in row]
    x_overlap_keys = [
        k for row in x_overlapped_centers.__dask_keys__() for k in row
    ]
    y_overlap_keys = [
        k for row in y_overlapped_centers.__dask_keys__() for k in row
    ]

    result_keys = [(result_name, i) for i in range(len(z_keys))]

    dsk = dict(
        (res_k, (chunk, z_k, x_k, y_k))
        for (res_k, z_k, x_k,
             y_k) in zip(result_keys, z_keys, x_overlap_keys, y_overlap_keys))

    dsk[result_name] = (apply, finalize, [(combine, result_keys)],
                        dict(cuda=cuda,
                             coords=axis,
                             dims=[glyph.y_label, glyph.x_label]))

    # Add x/y coord tasks to task graph
    dsk.update(x_overlapped_centers.dask)
    dsk.update(y_overlapped_centers.dask)

    return dsk, result_name
Exemple #13
0
def test_one_chunk_along_axis():
    a = np.arange(2 * 9).reshape(2, 9)
    darr = da.from_array(a, chunks=((2, ), (2, 2, 2, 3)))
    g = overlap(darr, depth=0, boundary=0)
    assert a.shape == g.shape
Exemple #14
0
def test_one_chunk_along_axis():
    a = np.arange(2 * 9).reshape(2, 9)
    darr = da.from_array(a, chunks=((2,), (2, 2, 2, 3)))
    g = overlap(darr, depth=0, boundary=0)
    assert a.shape == g.shape