def test_depth_greater_than_dim(): a = np.arange(144).reshape(12, 12) darr = da.from_array(a, chunks=(3, 5)) depth = {0: 13, 1: 4} with pytest.raises(ValueError, match="The overlapping depth"): overlap(darr, depth=depth, boundary=1)
def test_overlap(): x = np.arange(64).reshape((8, 8)) d = da.from_array(x, chunks=(4, 4)) g = overlap(d, depth={0: 2, 1: 1}, boundary={0: 100, 1: "reflect"}) assert g.chunks == ((8, 8), (6, 6)) expected = np.array([ [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100], [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100], [0, 0, 1, 2, 3, 4, 3, 4, 5, 6, 7, 7], [8, 8, 9, 10, 11, 12, 11, 12, 13, 14, 15, 15], [16, 16, 17, 18, 19, 20, 19, 20, 21, 22, 23, 23], [24, 24, 25, 26, 27, 28, 27, 28, 29, 30, 31, 31], [32, 32, 33, 34, 35, 36, 35, 36, 37, 38, 39, 39], [40, 40, 41, 42, 43, 44, 43, 44, 45, 46, 47, 47], [16, 16, 17, 18, 19, 20, 19, 20, 21, 22, 23, 23], [24, 24, 25, 26, 27, 28, 27, 28, 29, 30, 31, 31], [32, 32, 33, 34, 35, 36, 35, 36, 37, 38, 39, 39], [40, 40, 41, 42, 43, 44, 43, 44, 45, 46, 47, 47], [48, 48, 49, 50, 51, 52, 51, 52, 53, 54, 55, 55], [56, 56, 57, 58, 59, 60, 59, 60, 61, 62, 63, 63], [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100], [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100], ]) assert_eq(g, expected) assert same_keys( g, overlap(d, depth={ 0: 2, 1: 1 }, boundary={ 0: 100, 1: "reflect" })) g = overlap(d, depth={0: 2, 1: 1}, boundary={0: 100, 1: "none"}) expected = np.array([ [100, 100, 100, 100, 100, 100, 100, 100, 100, 100], [100, 100, 100, 100, 100, 100, 100, 100, 100, 100], [0, 1, 2, 3, 4, 3, 4, 5, 6, 7], [8, 9, 10, 11, 12, 11, 12, 13, 14, 15], [16, 17, 18, 19, 20, 19, 20, 21, 22, 23], [24, 25, 26, 27, 28, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 35, 36, 37, 38, 39], [40, 41, 42, 43, 44, 43, 44, 45, 46, 47], [16, 17, 18, 19, 20, 19, 20, 21, 22, 23], [24, 25, 26, 27, 28, 27, 28, 29, 30, 31], [32, 33, 34, 35, 36, 35, 36, 37, 38, 39], [40, 41, 42, 43, 44, 43, 44, 45, 46, 47], [48, 49, 50, 51, 52, 51, 52, 53, 54, 55], [56, 57, 58, 59, 60, 59, 60, 61, 62, 63], [100, 100, 100, 100, 100, 100, 100, 100, 100, 100], [100, 100, 100, 100, 100, 100, 100, 100, 100, 100], ]) assert_eq(g, expected) assert g.chunks == ((8, 8), (5, 5))
def test_nearest_overlap(): a = np.arange(144).reshape(12, 12).astype(float) darr = da.from_array(a, chunks=(6, 6)) garr = overlap(darr, depth={0: 5, 1: 5}, boundary={0: "nearest", 1: "nearest"}) tarr = trim_internal(garr, {0: 5, 1: 5}, boundary="nearest") assert_array_almost_equal(tarr, a)
def test_nearest_overlap(): a = np.arange(144).reshape(12, 12).astype(float) darr = da.from_array(a, chunks=(6, 6)) garr = overlap(darr, depth={0: 5, 1: 5}, boundary={0: 'nearest', 1: 'nearest'}) tarr = trim_internal(garr, {0: 5, 1: 5}) assert_array_almost_equal(tarr, a)
def test_overlap(): x = np.arange(64).reshape((8, 8)) d = da.from_array(x, chunks=(4, 4)) g = overlap(d, depth={0: 2, 1: 1}, boundary={0: 100, 1: 'reflect'}) assert g.chunks == ((8, 8), (6, 6)) expected = np.array( [[100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100], [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100], [ 0, 0, 1, 2, 3, 4, 3, 4, 5, 6, 7, 7], [ 8, 8, 9, 10, 11, 12, 11, 12, 13, 14, 15, 15], [ 16, 16, 17, 18, 19, 20, 19, 20, 21, 22, 23, 23], [ 24, 24, 25, 26, 27, 28, 27, 28, 29, 30, 31, 31], [ 32, 32, 33, 34, 35, 36, 35, 36, 37, 38, 39, 39], [ 40, 40, 41, 42, 43, 44, 43, 44, 45, 46, 47, 47], [ 16, 16, 17, 18, 19, 20, 19, 20, 21, 22, 23, 23], [ 24, 24, 25, 26, 27, 28, 27, 28, 29, 30, 31, 31], [ 32, 32, 33, 34, 35, 36, 35, 36, 37, 38, 39, 39], [ 40, 40, 41, 42, 43, 44, 43, 44, 45, 46, 47, 47], [ 48, 48, 49, 50, 51, 52, 51, 52, 53, 54, 55, 55], [ 56, 56, 57, 58, 59, 60, 59, 60, 61, 62, 63, 63], [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100], [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100]]) assert_eq(g, expected) assert same_keys(g, overlap(d, depth={0: 2, 1: 1}, boundary={0: 100, 1: 'reflect'})) g = overlap(d, depth={0: 2, 1: 1}, boundary={0: 100, 1: 'none'}) expected = np.array( [[100, 100, 100, 100, 100, 100, 100, 100, 100, 100], [100, 100, 100, 100, 100, 100, 100, 100, 100, 100], [ 0, 1, 2, 3, 4, 3, 4, 5, 6, 7], [ 8, 9, 10, 11, 12, 11, 12, 13, 14, 15], [ 16, 17, 18, 19, 20, 19, 20, 21, 22, 23], [ 24, 25, 26, 27, 28, 27, 28, 29, 30, 31], [ 32, 33, 34, 35, 36, 35, 36, 37, 38, 39], [ 40, 41, 42, 43, 44, 43, 44, 45, 46, 47], [ 16, 17, 18, 19, 20, 19, 20, 21, 22, 23], [ 24, 25, 26, 27, 28, 27, 28, 29, 30, 31], [ 32, 33, 34, 35, 36, 35, 36, 37, 38, 39], [ 40, 41, 42, 43, 44, 43, 44, 45, 46, 47], [ 48, 49, 50, 51, 52, 51, 52, 53, 54, 55], [ 56, 57, 58, 59, 60, 59, 60, 61, 62, 63], [100, 100, 100, 100, 100, 100, 100, 100, 100, 100], [100, 100, 100, 100, 100, 100, 100, 100, 100, 100]]) assert_eq(g, expected) assert g.chunks == ((8, 8), (5, 5))
def test_depth_greater_than_smallest_chunk_combines_chunks(chunks): a = np.arange(144).reshape(12, 12) darr = da.from_array(a, chunks=chunks) depth = {0: 4, 1: 2} output = overlap(darr, depth=depth, boundary=1) assert all(c >= depth[0] * 2 for c in output.chunks[0]) assert all(c >= depth[1] * 2 for c in output.chunks[1])
def test_different_depths_and_boundary_combinations(depth): expected = np.arange(100).reshape(10, 10) darr = da.from_array(expected, chunks=(5, 2)) reflected = overlap(darr, depth=depth, boundary="reflect") nearest = overlap(darr, depth=depth, boundary="nearest") periodic = overlap(darr, depth=depth, boundary="periodic") constant = overlap(darr, depth=depth, boundary=42) result = trim_internal(reflected, depth, boundary="reflect") assert_array_equal(result, expected) result = trim_internal(nearest, depth, boundary="nearest") assert_array_equal(result, expected) result = trim_internal(periodic, depth, boundary="periodic") assert_array_equal(result, expected) result = trim_internal(constant, depth, boundary=42) assert_array_equal(result, expected)
def test_depth_greater_than_boundary_length(): expected = np.arange(100).reshape(10, 10) darr = da.from_array(expected, chunks=(5, 5)) depth = {0: 8, 1: 7} reflected = overlap(darr, depth=depth, boundary="reflect") nearest = overlap(darr, depth=depth, boundary="nearest") periodic = overlap(darr, depth=depth, boundary="periodic") constant = overlap(darr, depth=depth, boundary=42) result = trim_internal(reflected, depth) assert_array_equal(result, expected) result = trim_internal(nearest, depth) assert_array_equal(result, expected) result = trim_internal(periodic, depth) assert_array_equal(result, expected) result = trim_internal(constant, depth) assert_array_equal(result, expected)
def test_depth_greater_than_boundary_length(): expected = np.arange(100).reshape(10, 10) darr = da.from_array(expected, chunks=(5, 5)) depth = {0: 8, 1: 7} reflected = overlap(darr, depth=depth, boundary='reflect') nearest = overlap(darr, depth=depth, boundary='nearest') periodic = overlap(darr, depth=depth, boundary='periodic') constant = overlap(darr, depth=depth, boundary=42) result = trim_internal(reflected, depth) assert_array_equal(result, expected) result = trim_internal(nearest, depth) assert_array_equal(result, expected) result = trim_internal(periodic, depth) assert_array_equal(result, expected) result = trim_internal(constant, depth) assert_array_equal(result, expected)
def test_depth_equals_boundary_length(): expected = np.arange(100).reshape(10, 10) darr = da.from_array(expected, chunks=(5, 5)) depth = {0: 5, 1: 5} reflected = overlap(darr, depth=depth, boundary='reflect') nearest = overlap(darr, depth=depth, boundary='nearest') periodic = overlap(darr, depth=depth, boundary='periodic') constant = overlap(darr, depth=depth, boundary=42) result = trim_internal(reflected, depth) assert_array_equal(result, expected) result = trim_internal(nearest, depth) assert_array_equal(result, expected) result = trim_internal(periodic, depth) assert_array_equal(result, expected) result = trim_internal(constant, depth) assert_array_equal(result, expected)
def map_overlap(func, *args, depth=None, boundary=None, trim=True, align_arrays=True, **kwargs): """ Map a function over blocks of arrays with some overlap We share neighboring zones between blocks of the array, map a function, and then trim away the neighboring strips. Parameters ---------- func: function The function to apply to each extended block args : dask arrays depth: int, tuple, dict or list The number of elements that each block should share with its neighbors If a tuple or dict then this can be different per axis. If a list then each element of that list must be an int, tuple or dict defining depth for the corresponding array in `args`. Asymmetric depths may be specified using a dict value of (-/+) tuples. Note that asymmetric depths are currently only supported when ``boundary`` is 'none'. The default value is 0. boundary: str, tuple, dict or list How to handle the boundaries. Values include 'reflect', 'periodic', 'nearest', 'none', or any constant value like 0 or np.nan. If a list then each element must be a str, tuple or dict defining the boundary for the corresponding array in `args`. The default value is 'reflect'. trim: bool Whether or not to trim ``depth`` elements from each block after calling the map function. Set this to False if your mapping function already does this for you align_arrays: bool Whether or not to align chunks along equally sized dimensions when multiple arrays are provided. This allows for larger chunks in some arrays to be broken into smaller ones that match chunk sizes in other arrays such that they are compatible for block function mapping. If this is false, then an error will be thrown if arrays do not already have the same number of blocks in each dimensions. **kwargs: Other keyword arguments valid in ``map_blocks`` Examples -------- >>> import numpy as np >>> import dask.array as da >>> x = np.array([1, 1, 2, 3, 3, 3, 2, 1, 1]) >>> x = da.from_array(x, chunks=5) >>> def derivative(x): ... return x - np.roll(x, 1) >>> y = x.map_overlap(derivative, depth=1, boundary=0) >>> y.compute() array([ 1, 0, 1, 1, 0, 0, -1, -1, 0]) >>> x = np.arange(16).reshape((4, 4)) >>> d = da.from_array(x, chunks=(2, 2)) >>> d.map_overlap(lambda x: x + x.size, depth=1).compute() array([[16, 17, 18, 19], [20, 21, 22, 23], [24, 25, 26, 27], [28, 29, 30, 31]]) >>> func = lambda x: x + x.size >>> depth = {0: 1, 1: 1} >>> boundary = {0: 'reflect', 1: 'none'} >>> d.map_overlap(func, depth, boundary).compute() # doctest: +NORMALIZE_WHITESPACE array([[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23], [24, 25, 26, 27]]) """ # Look for invocation using deprecated single-array signature # map_overlap(x, func, depth, boundary=None, trim=True, **kwargs) if isinstance(func, Array) and callable(args[0]): warnings.warn( "Detected use of signature map_overlap(x, func) rather than " "map_overlap(func, *args) for multi-array support. Arguments " "will be swapped in this case but such an exception will not " "be made in a future release.", FutureWarning, ) sig = ["func", "depth", "boundary", "trim"] depth = get(sig.index("depth"), args, depth) boundary = get(sig.index("boundary"), args, boundary) trim = get(sig.index("trim"), args, trim) func, args = args[0], [func] if not callable(func): raise TypeError("First argument must be callable function, not {}\n" "Usage: da.map_overlap(function, x)\n" " or: da.map_overlap(function, x, y, z)".format( type(func).__name__)) if not all(isinstance(x, Array) for x in args): raise TypeError("All variadic arguments must be arrays, not {}\n" "Usage: da.map_overlap(function, x)\n" " or: da.map_overlap(function, x, y, z)".format( [type(x).__name__ for x in args])) # Coerce depth and boundary arguments to lists of individual # specifications for each array argument def coerce(xs, arg, fn): if not isinstance(arg, list): arg = [arg] * len(xs) return [fn(x.ndim, a) for x, a in zip(xs, arg)] depth = coerce(args, depth, coerce_depth) boundary = coerce(args, boundary, coerce_boundary) # Align chunks in each array to a common size if align_arrays: # Reverse unification order to allow block broadcasting inds = [list(reversed(range(x.ndim))) for x in args] _, args = da.core.unify_chunks(*list(concat(zip(args, inds))), warn=False) for i, x in enumerate(args): for j in range(x.ndim): if isinstance(depth[i][j], tuple) and boundary[i][j] != "none": raise NotImplementedError( "Asymmetric overlap is currently only implemented " "for boundary='none', however boundary for dimension " "{} in array argument {} is {}".format( j, i, boundary[i][j])) def assert_int_chunksize(xs): assert all(type(c) is int for x in xs for cc in x.chunks for c in cc) assert_int_chunksize(args) args = [ overlap(x, depth=d, boundary=b) for x, d, b in zip(args, depth, boundary) ] assert_int_chunksize(args) x = da.map_blocks(func, *args, **kwargs) assert_int_chunksize([x]) if trim: # Find index of array argument with maximum rank and break ties by choosing first provided i = sorted(enumerate(args), key=lambda v: (v[1].ndim, -v[0]))[-1][0] # Trim using depth/boundary setting for array of highest rank return trim_internal(x, depth[i], boundary[i]) else: return x
def dask_curvilinear(glyph, xr_ds, schema, canvas, summary, cuda): shape, bounds, st, axis = shape_bounds_st_and_axis(xr_ds, canvas, glyph) # Compile functions create, info, append, combine, finalize = \ compile_components(summary, schema, glyph, cuda=cuda) x_mapper = canvas.x_axis.mapper y_mapper = canvas.y_axis.mapper extend = glyph._build_extend(x_mapper, y_mapper, info, append) x_coord_name = glyph.x y_coord_name = glyph.y z_name = glyph.name data_dim_names = list(xr_ds[z_name].dims) x_coord_dim_names = list(xr_ds[x_coord_name].dims) y_coord_dim_names = list(xr_ds[y_coord_name].dims) zs = xr_ds[z_name].data x_centers = xr_ds[glyph.x].data y_centers = xr_ds[glyph.y].data var_name = list(xr_ds.data_vars.keys())[0] # Validate coordinates err_msg = ( "DataArray {name} is backed by a Dask array, \n" "but coordinate {coord} is not backed by a Dask array with identical \n" "dimension order and chunks") if (not isinstance(x_centers, dask.array.Array) or xr_ds[glyph.name].dims != xr_ds[glyph.x].dims or xr_ds[glyph.name].chunks != xr_ds[glyph.x].chunks): raise ValueError(err_msg.format(name=glyph.name, coord=glyph.x)) if (not isinstance(y_centers, dask.array.Array) or xr_ds[glyph.name].dims != xr_ds[glyph.y].dims or xr_ds[glyph.name].chunks != xr_ds[glyph.y].chunks): raise ValueError(err_msg.format(name=glyph.name, coord=glyph.y)) # Make sure coordinates are floats so that overlap with nan will behave properly if x_centers.dtype.kind != 'f': x_centers = x_centers.astype(np.float64) if y_centers.dtype.kind != 'f': y_centers = y_centers.astype(np.float64) x_overlapped_centers = overlap(x_centers, depth=1, boundary=np.nan) y_overlapped_centers = overlap(y_centers, depth=1, boundary=np.nan) def chunk(np_zs, np_x_centers, np_y_centers): # Handle boundaries that have nothing to overlap with for centers in [np_x_centers, np_y_centers]: if np.isnan(centers[0, :]).all(): centers[0, :] = centers[1, :] - (centers[2, :] - centers[1, :]) if np.isnan(centers[-1, :]).all(): centers[-1, :] = centers[-2, :] + (centers[-2, :] - centers[-3, :]) if np.isnan(centers[:, 0]).all(): centers[:, 0] = centers[:, 1] - (centers[:, 2] - centers[:, 1]) if np.isnan(centers[:, -1]).all(): centers[:, -1] = centers[:, -2] + (centers[:, -2] - centers[:, -3]) # compute interval breaks x_breaks_chunk = glyph.infer_interval_breaks(np_x_centers) y_breaks_chunk = glyph.infer_interval_breaks(np_y_centers) # trim breaks x_breaks_chunk = x_breaks_chunk[1:-1, 1:-1] y_breaks_chunk = y_breaks_chunk[1:-1, 1:-1] # Reconstruct dataset for chunk from numpy array and chunk indices chunk_coords = { x_coord_name: (x_coord_dim_names, np_x_centers[1:-1, 1:-1]), y_coord_name: (y_coord_dim_names, np_y_centers[1:-1, 1:-1]), } chunk_ds = xr.DataArray(np_zs, coords=chunk_coords, dims=data_dim_names, name=var_name).to_dataset() # Initialize aggregation buffers aggs = create(shape) # Perform aggregation extend(aggs, chunk_ds, st, bounds, x_breaks=x_breaks_chunk, y_breaks=y_breaks_chunk) return aggs result_name = tokenize(xr_ds.__dask_tokenize__(), canvas, glyph, summary) z_keys = [k for row in zs.__dask_keys__() for k in row] x_overlap_keys = [ k for row in x_overlapped_centers.__dask_keys__() for k in row ] y_overlap_keys = [ k for row in y_overlapped_centers.__dask_keys__() for k in row ] result_keys = [(result_name, i) for i in range(len(z_keys))] dsk = dict( (res_k, (chunk, z_k, x_k, y_k)) for (res_k, z_k, x_k, y_k) in zip(result_keys, z_keys, x_overlap_keys, y_overlap_keys)) dsk[result_name] = (apply, finalize, [(combine, result_keys)], dict(cuda=cuda, coords=axis, dims=[glyph.y_label, glyph.x_label])) # Add x/y coord tasks to task graph dsk.update(x_overlapped_centers.dask) dsk.update(y_overlapped_centers.dask) return dsk, result_name
def test_one_chunk_along_axis(): a = np.arange(2 * 9).reshape(2, 9) darr = da.from_array(a, chunks=((2, ), (2, 2, 2, 3))) g = overlap(darr, depth=0, boundary=0) assert a.shape == g.shape
def test_one_chunk_along_axis(): a = np.arange(2 * 9).reshape(2, 9) darr = da.from_array(a, chunks=((2,), (2, 2, 2, 3))) g = overlap(darr, depth=0, boundary=0) assert a.shape == g.shape