def test_compute_no_opt(): # Bag does `fuse` by default. Test that with `optimize_graph=False` that # doesn't get called. We check this by using a callback to track the keys # that are computed. from dask.callbacks import Callback b = db.from_sequence(range(100), npartitions=4) add1 = partial(add, 1) mul2 = partial(mul, 2) o = b.map(add1).map(mul2) # Check that with the kwarg, the optimization doesn't happen keys = [] with Callback(pretask=lambda key, *args: keys.append(key)): o.compute(scheduler="single-threaded", optimize_graph=False) assert len([k for k in keys if "mul" in k[0]]) == 4 assert len([k for k in keys if "add" in k[0]]) == 4 # Check that without the kwarg, the optimization does happen keys = [] with Callback(pretask=lambda key, *args: keys.append(key)): o.compute(scheduler="single-threaded") # Names of fused tasks have been merged, and the original key is an alias. # Otherwise, the lengths below would be 4 and 0. assert len([k for k in keys if "mul" in k[0]]) == 8 assert len([k for k in keys if "add" in k[0]]) == 4 assert len([k for k in keys if "add-mul" in k[0]]) == 4 # See? Renamed
def overlap_internal(x, axes): """Share boundaries between neighboring blocks Parameters ---------- x: da.Array A dask array axes: dict The size of the shared boundary per axis The axes input informs how many cells to overlap between neighboring blocks {0: 2, 2: 5} means share two cells in 0 axis, 5 cells in 2 axis """ dims = list(map(len, x.chunks)) expand_key2 = partial(expand_key, dims=dims, axes=axes) # Make keys for each of the surrounding sub-arrays interior_keys = pipe(x.__dask_keys__(), flatten, map(expand_key2), map(flatten), concat, list) name = "overlap-" + tokenize(x, axes) getitem_name = "getitem-" + tokenize(x, axes) interior_slices = {} overlap_blocks = {} for k in interior_keys: frac_slice = fractional_slice((x.name, ) + k, axes) if (x.name, ) + k != frac_slice: interior_slices[(getitem_name, ) + k] = frac_slice else: interior_slices[(getitem_name, ) + k] = (x.name, ) + k overlap_blocks[(name, ) + k] = ( concatenate3, (concrete, expand_key2((None, ) + k, name=getitem_name)), ) chunks = [] for i, bds in enumerate(x.chunks): depth = axes.get(i, 0) if isinstance(depth, tuple): left_depth = depth[0] right_depth = depth[1] else: left_depth = depth right_depth = depth if len(bds) == 1: chunks.append(bds) else: left = [bds[0] + right_depth] right = [bds[-1] + left_depth] mid = [] for bd in bds[1:-1]: mid.append(bd + left_depth + right_depth) chunks.append(left + mid + right) dsk = merge(interior_slices, overlap_blocks) graph = HighLevelGraph.from_collections(name, dsk, dependencies=[x]) return Array(graph, name, chunks, meta=x)
def __getattr__(self, key): if key in self._delegates: if isinstance(getattr(self._accessor, key), property): return self._property_map(key) else: return partial(self._function_map, key) else: raise AttributeError(key)
def test_tokenize_partial_func_args_kwargs_consistent(): f = partial(f3, f2, c=f1) res = normalize_token(f) sol = ( b"cdask.tests.test_base\nf3\np0\n.", (b"cdask.tests.test_base\nf2\np0\n.", ), (("c", b"cdask.tests.test_base\nf1\np0\n."), ), ) assert res == sol
def test_normalize_function(): assert normalize_function(f2) assert normalize_function(lambda a: a) assert normalize_function(partial(f2, b=2)) == normalize_function( partial(f2, b=2)) assert normalize_function(partial(f2, b=2)) != normalize_function( partial(f2, b=3)) assert normalize_function(partial(f1, b=2)) != normalize_function( partial(f2, b=2)) assert normalize_function(compose(f2, f3)) == normalize_function( compose(f2, f3)) assert normalize_function(compose(f2, f3)) != normalize_function( compose(f2, f1)) assert normalize_function(curry(f2)) == normalize_function(curry(f2)) assert normalize_function(curry(f2)) != normalize_function(curry(f1)) assert normalize_function(curry(f2, b=1)) == normalize_function(curry(f2, b=1)) assert normalize_function(curry(f2, b=1)) != normalize_function( curry(f2, b=2))
def test_tokenize_partial_func_args_kwargs_consistent(): f = partial(f3, f2, c=f1) res = normalize_token(f) sol = ( b"\x80\x04\x95\x1f\x00\x00\x00\x00\x00\x00\x00\x8c\x14dask.tests.test_base\x94\x8c\x02f3\x94\x93\x94.", (b"\x80\x04\x95\x1f\x00\x00\x00\x00\x00\x00\x00\x8c\x14dask.tests.test_base\x94\x8c\x02f2\x94\x93\x94.", ), (( "c", b"\x80\x04\x95\x1f\x00\x00\x00\x00\x00\x00\x00\x8c\x14dask.tests.test_base\x94\x8c\x02f1\x94\x93\x94.", ), ), ) assert res == sol
def trim_internal(x, axes, boundary=None): """Trim sides from each block This couples well with the overlap operation, which may leave excess data on each block See also -------- dask.array.chunk.trim dask.array.map_blocks """ boundary = coerce_boundary(x.ndim, boundary) olist = [] for i, bd in enumerate(x.chunks): bdy = boundary.get(i, "none") overlap = axes.get(i, 0) ilist = [] for j, d in enumerate(bd): if bdy != "none": if isinstance(overlap, tuple): d = d - sum(overlap) else: d = d - overlap * 2 else: if isinstance(overlap, tuple): d = d - overlap[0] if j != 0 else d d = d - overlap[1] if j != len(bd) - 1 else d else: d = d - overlap if j != 0 else d d = d - overlap if j != len(bd) - 1 else d ilist.append(d) olist.append(tuple(ilist)) chunks = tuple(olist) return map_blocks( partial(_trim, axes=axes, boundary=boundary), x, chunks=chunks, dtype=x.dtype, meta=x._meta, )