def make_blockwise_graph( func, output, out_indices, *arrind_pairs, numblocks=None, concatenate=None, new_axes=None, output_blocks=None, dims=None, deserializing=False, func_future_args=None, return_key_deps=False, io_deps=None, **kwargs, ): """Tensor operation Applies a function, ``func``, across blocks from many different input collections. We arrange the pattern with which those blocks interact with sets of matching indices. E.g.:: make_blockwise_graph(func, 'z', 'i', 'x', 'i', 'y', 'i') yield an embarrassingly parallel communication pattern and is read as $$ z_i = func(x_i, y_i) $$ More complex patterns may emerge, including multiple indices:: make_blockwise_graph(func, 'z', 'ij', 'x', 'ij', 'y', 'ji') $$ z_{ij} = func(x_{ij}, y_{ji}) $$ Indices missing in the output but present in the inputs results in many inputs being sent to one function (see examples). Examples -------- Simple embarrassing map operation >>> inc = lambda x: x + 1 >>> make_blockwise_graph(inc, 'z', 'ij', 'x', 'ij', numblocks={'x': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (inc, ('x', 0, 0)), ('z', 0, 1): (inc, ('x', 0, 1)), ('z', 1, 0): (inc, ('x', 1, 0)), ('z', 1, 1): (inc, ('x', 1, 1))} Simple operation on two datasets >>> add = lambda x, y: x + y >>> make_blockwise_graph(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)), ('z', 1, 0): (add, ('x', 1, 0), ('y', 1, 0)), ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))} Operation that flips one of the datasets >>> addT = lambda x, y: x + y.T # Transpose each chunk >>> # z_ij ~ x_ij y_ji >>> # .. .. .. notice swap >>> make_blockwise_graph(addT, 'z', 'ij', 'x', 'ij', 'y', 'ji', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 1, 0)), ('z', 1, 0): (add, ('x', 1, 0), ('y', 0, 1)), ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))} Dot product with contraction over ``j`` index. Yields list arguments >>> make_blockwise_graph(dotmany, 'z', 'ik', 'x', 'ij', 'y', 'jk', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (dotmany, [('x', 0, 0), ('x', 0, 1)], [('y', 0, 0), ('y', 1, 0)]), ('z', 0, 1): (dotmany, [('x', 0, 0), ('x', 0, 1)], [('y', 0, 1), ('y', 1, 1)]), ('z', 1, 0): (dotmany, [('x', 1, 0), ('x', 1, 1)], [('y', 0, 0), ('y', 1, 0)]), ('z', 1, 1): (dotmany, [('x', 1, 0), ('x', 1, 1)], [('y', 0, 1), ('y', 1, 1)])} Pass ``concatenate=True`` to concatenate arrays ahead of time >>> make_blockwise_graph(f, 'z', 'i', 'x', 'ij', 'y', 'ij', concatenate=True, ... numblocks={'x': (2, 2), 'y': (2, 2,)}) # doctest: +SKIP {('z', 0): (f, (concatenate_axes, [('x', 0, 0), ('x', 0, 1)], (1,)), (concatenate_axes, [('y', 0, 0), ('y', 0, 1)], (1,))) ('z', 1): (f, (concatenate_axes, [('x', 1, 0), ('x', 1, 1)], (1,)), (concatenate_axes, [('y', 1, 0), ('y', 1, 1)], (1,)))} Supports Broadcasting rules >>> make_blockwise_graph(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (1, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)), ('z', 1, 0): (add, ('x', 0, 0), ('y', 1, 0)), ('z', 1, 1): (add, ('x', 0, 1), ('y', 1, 1))} Support keyword arguments with apply >>> def f(a, b=0): return a + b >>> make_blockwise_graph(f, 'z', 'i', 'x', 'i', numblocks={'x': (2,)}, b=10) # doctest: +SKIP {('z', 0): (apply, f, [('x', 0)], {'b': 10}), ('z', 1): (apply, f, [('x', 1)], {'b': 10})} Include literals by indexing with ``None`` >>> make_blockwise_graph(add, 'z', 'i', 'x', 'i', 100, None, numblocks={'x': (2,)}) # doctest: +SKIP {('z', 0): (add, ('x', 0), 100), ('z', 1): (add, ('x', 1), 100)} See Also -------- dask.array.blockwise dask.blockwise.blockwise """ if numblocks is None: raise ValueError("Missing required numblocks argument.") new_axes = new_axes or {} io_deps = io_deps or {} argpairs = list(toolz.partition(2, arrind_pairs)) if return_key_deps: key_deps = {} if deserializing: from distributed.protocol.serialize import import_allowed_module from distributed.worker import dumps_function, warn_dumps else: from importlib import import_module as import_allowed_module # Check if there are tuple arguments in `io_deps`. # If so, we must use this tuple to construct the actual # IO-argument mapping. io_arg_mappings = {} for arg, val in io_deps.items(): if isinstance(val, tuple): _args = io_deps[arg] module_name, attr_name = _args[0].rsplit(".", 1) io_dep_map = getattr(import_allowed_module(module_name), attr_name) if deserializing: _args = io_dep_map.__dask_distributed_unpack__(*_args) io_arg_mappings[arg] = io_dep_map(*_args[1:]) if concatenate is True: from dask.array.core import concatenate_axes as concatenate # Dictionary mapping {i: 3, j: 4, ...} for i, j, ... the dimensions dims = dims or _make_dims(argpairs, numblocks, new_axes) # Generate the abstract "plan" before constructing # the actual graph (coord_maps, concat_axes, dummies) = _get_coord_mapping( dims, output, out_indices, numblocks, argpairs, concatenate, ) # Unpack delayed objects in kwargs dsk2 = {} if kwargs: task, dsk2 = unpack_collections(kwargs) if dsk2: kwargs2 = task else: kwargs2 = kwargs # Apply Culling. # Only need to construct the specified set of output blocks output_blocks = output_blocks or itertools.product( *[range(dims[i]) for i in out_indices]) dsk = {} # Create argument lists for out_coords in output_blocks: deps = set() coords = out_coords + dummies args = [] for cmap, axes, (arg, ind) in zip(coord_maps, concat_axes, argpairs): if ind is None: if deserializing: args.append(stringify_collection_keys(arg)) else: args.append(arg) else: arg_coords = tuple(coords[c] for c in cmap) if axes: tups = lol_product((arg, ), arg_coords) if arg not in io_deps: deps.update(flatten(tups)) if concatenate: tups = (concatenate, tups, axes) else: tups = (arg, ) + arg_coords if arg not in io_deps: deps.add(tups) # Replace "place-holder" IO keys with "real" args if arg in io_deps: # We don't want to stringify keys for args # we are replacing here idx = tups[1:] if arg in io_arg_mappings: args.append(io_arg_mappings[arg][idx]) else: # The required inputs for the IO function # are specified explicitly in `io_deps` # (Or the index is the only required arg) args.append(io_deps[arg].get(idx, idx)) elif deserializing: args.append(stringify_collection_keys(tups)) else: args.append(tups) out_key = (output, ) + out_coords if deserializing: deps.update(func_future_args) args += list(func_future_args) if kwargs: val = { "function": dumps_function(apply), "args": warn_dumps(args), "kwargs": warn_dumps(kwargs2), } else: val = {"function": func, "args": warn_dumps(args)} else: if kwargs: val = (apply, func, args, kwargs2) else: args.insert(0, func) val = tuple(args) dsk[out_key] = val if return_key_deps: key_deps[out_key] = deps if dsk2: dsk.update(ensure_dict(dsk2)) if return_key_deps: return dsk, key_deps else: return dsk
def make_blockwise_graph(func, output, out_indices, *arrind_pairs, **kwargs): """Tensor operation Applies a function, ``func``, across blocks from many different input collections. We arrange the pattern with which those blocks interact with sets of matching indices. E.g.:: make_blockwise_graph(func, 'z', 'i', 'x', 'i', 'y', 'i') yield an embarrassingly parallel communication pattern and is read as $$ z_i = func(x_i, y_i) $$ More complex patterns may emerge, including multiple indices:: make_blockwise_graph(func, 'z', 'ij', 'x', 'ij', 'y', 'ji') $$ z_{ij} = func(x_{ij}, y_{ji}) $$ Indices missing in the output but present in the inputs results in many inputs being sent to one function (see examples). Examples -------- Simple embarrassing map operation >>> inc = lambda x: x + 1 >>> make_blockwise_graph(inc, 'z', 'ij', 'x', 'ij', numblocks={'x': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (inc, ('x', 0, 0)), ('z', 0, 1): (inc, ('x', 0, 1)), ('z', 1, 0): (inc, ('x', 1, 0)), ('z', 1, 1): (inc, ('x', 1, 1))} Simple operation on two datasets >>> add = lambda x, y: x + y >>> make_blockwise_graph(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)), ('z', 1, 0): (add, ('x', 1, 0), ('y', 1, 0)), ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))} Operation that flips one of the datasets >>> addT = lambda x, y: x + y.T # Transpose each chunk >>> # z_ij ~ x_ij y_ji >>> # .. .. .. notice swap >>> make_blockwise_graph(addT, 'z', 'ij', 'x', 'ij', 'y', 'ji', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 1, 0)), ('z', 1, 0): (add, ('x', 1, 0), ('y', 0, 1)), ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))} Dot product with contraction over ``j`` index. Yields list arguments >>> make_blockwise_graph(dotmany, 'z', 'ik', 'x', 'ij', 'y', 'jk', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (dotmany, [('x', 0, 0), ('x', 0, 1)], [('y', 0, 0), ('y', 1, 0)]), ('z', 0, 1): (dotmany, [('x', 0, 0), ('x', 0, 1)], [('y', 0, 1), ('y', 1, 1)]), ('z', 1, 0): (dotmany, [('x', 1, 0), ('x', 1, 1)], [('y', 0, 0), ('y', 1, 0)]), ('z', 1, 1): (dotmany, [('x', 1, 0), ('x', 1, 1)], [('y', 0, 1), ('y', 1, 1)])} Pass ``concatenate=True`` to concatenate arrays ahead of time >>> make_blockwise_graph(f, 'z', 'i', 'x', 'ij', 'y', 'ij', concatenate=True, ... numblocks={'x': (2, 2), 'y': (2, 2,)}) # doctest: +SKIP {('z', 0): (f, (concatenate_axes, [('x', 0, 0), ('x', 0, 1)], (1,)), (concatenate_axes, [('y', 0, 0), ('y', 0, 1)], (1,))) ('z', 1): (f, (concatenate_axes, [('x', 1, 0), ('x', 1, 1)], (1,)), (concatenate_axes, [('y', 1, 0), ('y', 1, 1)], (1,)))} Supports Broadcasting rules >>> make_blockwise_graph(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (1, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)), ('z', 1, 0): (add, ('x', 0, 0), ('y', 1, 0)), ('z', 1, 1): (add, ('x', 0, 1), ('y', 1, 1))} Support keyword arguments with apply >>> def f(a, b=0): return a + b >>> make_blockwise_graph(f, 'z', 'i', 'x', 'i', numblocks={'x': (2,)}, b=10) # doctest: +SKIP {('z', 0): (apply, f, [('x', 0)], {'b': 10}), ('z', 1): (apply, f, [('x', 1)], {'b': 10})} Include literals by indexing with ``None`` >>> make_blockwise_graph(add, 'z', 'i', 'x', 'i', 100, None, numblocks={'x': (2,)}) # doctest: +SKIP {('z', 0): (add, ('x', 0), 100), ('z', 1): (add, ('x', 1), 100)} See Also -------- dask.array.blockwise dask.blockwise.blockwise """ numblocks = kwargs.pop("numblocks") concatenate = kwargs.pop("concatenate", None) new_axes = kwargs.pop("new_axes", {}) output_blocks = kwargs.pop("output_blocks", None) dims = kwargs.pop("dims", None) argpairs = list(toolz.partition(2, arrind_pairs)) deserializing = kwargs.pop("deserializing", False) func_future_args = kwargs.pop("func_future_args", None) return_key_deps = kwargs.pop("return_key_deps", False) if return_key_deps: key_deps = {} if deserializing: from distributed.worker import warn_dumps, dumps_function if concatenate is True: from dask.array.core import concatenate_axes as concatenate # Dictionary mapping {i: 3, j: 4, ...} for i, j, ... the dimensions dims = dims or _make_dims(argpairs, numblocks, new_axes) # Generate the abstract "plan" before constructing # the actual graph (coord_maps, concat_axes, dummies) = _get_coord_mapping( dims, output, out_indices, numblocks, argpairs, concatenate, ) # Unpack delayed objects in kwargs dsk2 = {} if kwargs: task, dsk2 = unpack_collections(kwargs) if dsk2: kwargs2 = task else: kwargs2 = kwargs # Apply Culling. # Only need to construct the specified set of output blocks output_blocks = output_blocks or itertools.product( *[range(dims[i]) for i in out_indices]) dsk = {} # Create argument lists for out_coords in output_blocks: deps = set() coords = out_coords + dummies args = [] for cmap, axes, (arg, ind) in zip(coord_maps, concat_axes, argpairs): if ind is None: args.append(arg) else: arg_coords = tuple(coords[c] for c in cmap) if axes: tups = lol_product((arg, ), arg_coords) deps.update(flatten(tups)) if concatenate: tups = (concatenate, tups, axes) else: tups = (arg, ) + arg_coords deps.add(tups) args.append(tups) out_key = (output, ) + out_coords if deserializing: deps.update(func_future_args) args = stringify_collection_keys(args) + list(func_future_args) if kwargs: val = { "function": dumps_function(apply), "args": warn_dumps(args), "kwargs": warn_dumps(kwargs2), } else: val = {"function": func, "args": warn_dumps(args)} else: if kwargs: val = (apply, func, args, kwargs2) else: args.insert(0, func) val = tuple(args) dsk[out_key] = val if return_key_deps: key_deps[out_key] = deps if dsk2: dsk.update(ensure_dict(dsk2)) if return_key_deps: return dsk, key_deps else: return dsk