def parse_args(args): options = dict(partition(2, args)) for k, v in options.items(): if v.isdigit(): options[k] = int(v) return options
def assign(df, *pairs): # Only deep copy when updating an element # (to avoid modifying the original) pairs = dict(partition(2, pairs)) deep = bool(set(pairs) & set(df.columns)) df = df.copy(deep=bool(deep)) for name, val in pairs.items(): df[name] = val return df
def reshapelist(shape, seq): """Reshape iterator to nested shape >>> reshapelist((2, 3), range(6)) [[0, 1, 2], [3, 4, 5]] """ if len(shape) == 1: return list(seq) else: n = int(len(seq) / shape[0]) return [ reshapelist(shape[1:], part) for part in toolz.partition(n, seq) ]
def blockwise( func, out_ind, *args, name=None, token=None, dtype=None, adjust_chunks=None, new_axes=None, align_arrays=True, concatenate=None, meta=None, **kwargs ): """ Tensor operation: Generalized inner and outer products A broad class of blocked algorithms and patterns can be specified with a concise multi-index notation. The ``blockwise`` function applies an in-memory function across multiple blocks of multiple inputs in a variety of ways. Many dask.array operations are special cases of blockwise including elementwise, broadcasting, reductions, tensordot, and transpose. Parameters ---------- func : callable Function to apply to individual tuples of blocks out_ind : iterable Block pattern of the output, something like 'ijk' or (1, 2, 3) *args : sequence of Array, index pairs Sequence like (x, 'ij', y, 'jk', z, 'i') **kwargs : dict Extra keyword arguments to pass to function dtype : np.dtype Datatype of resulting array. concatenate : bool, keyword only If true concatenate arrays along dummy indices, else provide lists adjust_chunks : dict Dictionary mapping index to function to be applied to chunk sizes new_axes : dict, keyword only New indexes and their dimension lengths Examples -------- 2D embarrassingly parallel operation from two arrays, x, and y. >>> z = blockwise(operator.add, 'ij', x, 'ij', y, 'ij', dtype='f8') # z = x + y # doctest: +SKIP Outer product multiplying x by y, two 1-d vectors >>> z = blockwise(operator.mul, 'ij', x, 'i', y, 'j', dtype='f8') # doctest: +SKIP z = x.T >>> z = blockwise(np.transpose, 'ji', x, 'ij', dtype=x.dtype) # doctest: +SKIP The transpose case above is illustrative because it does same transposition both on each in-memory block by calling ``np.transpose`` and on the order of the blocks themselves, by switching the order of the index ``ij -> ji``. We can compose these same patterns with more variables and more complex in-memory functions z = X + Y.T >>> z = blockwise(lambda x, y: x + y.T, 'ij', x, 'ij', y, 'ji', dtype='f8') # doctest: +SKIP Any index, like ``i`` missing from the output index is interpreted as a contraction (note that this differs from Einstein convention; repeated indices do not imply contraction.) In the case of a contraction the passed function should expect an iterable of blocks on any array that holds that index. To receive arrays concatenated along contracted dimensions instead pass ``concatenate=True``. Inner product multiplying x by y, two 1-d vectors >>> def sequence_dot(x_blocks, y_blocks): ... result = 0 ... for x, y in zip(x_blocks, y_blocks): ... result += x.dot(y) ... return result >>> z = blockwise(sequence_dot, '', x, 'i', y, 'i', dtype='f8') # doctest: +SKIP Add new single-chunk dimensions with the ``new_axes=`` keyword, including the length of the new dimension. New dimensions will always be in a single chunk. >>> def f(x): ... return x[:, None] * np.ones((1, 5)) >>> z = blockwise(f, 'az', x, 'a', new_axes={'z': 5}, dtype=x.dtype) # doctest: +SKIP New dimensions can also be multi-chunk by specifying a tuple of chunk sizes. This has limited utility as is (because the chunks are all the same), but the resulting graph can be modified to achieve more useful results (see ``da.map_blocks``). >>> z = blockwise(f, 'az', x, 'a', new_axes={'z': (5, 5)}, dtype=x.dtype) # doctest: +SKIP If the applied function changes the size of each chunk you can specify this with a ``adjust_chunks={...}`` dictionary holding a function for each index that modifies the dimension size in that index. >>> def double(x): ... return np.concatenate([x, x]) >>> y = blockwise(double, 'ij', x, 'ij', ... adjust_chunks={'i': lambda n: 2 * n}, dtype=x.dtype) # doctest: +SKIP Include literals by indexing with None >>> y = blockwise(add, 'ij', x, 'ij', 1234, None, dtype=x.dtype) # doctest: +SKIP """ out = name new_axes = new_axes or {} # Input Validation if len(set(out_ind)) != len(out_ind): raise ValueError( "Repeated elements not allowed in output index", [k for k, v in toolz.frequencies(out_ind).items() if v > 1], ) new = ( set(out_ind) - {a for arg in args[1::2] if arg is not None for a in arg} - set(new_axes or ()) ) if new: raise ValueError("Unknown dimension", new) from .core import Array, unify_chunks, normalize_arg if align_arrays: chunkss, arrays = unify_chunks(*args) else: arginds = [(a, i) for (a, i) in toolz.partition(2, args) if i is not None] chunkss = {} # For each dimension, use the input chunking that has the most blocks; # this will ensure that broadcasting works as expected, and in # particular the number of blocks should be correct if the inputs are # consistent. for arg, ind in arginds: for c, i in zip(arg.chunks, ind): if i not in chunkss or len(c) > len(chunkss[i]): chunkss[i] = c arrays = args[::2] for k, v in new_axes.items(): if not isinstance(v, tuple): v = (v,) chunkss[k] = v arginds = list(zip(arrays, args[1::2])) for arg, ind in arginds: if hasattr(arg, "ndim") and hasattr(ind, "__len__") and arg.ndim != len(ind): raise ValueError( "Index string %s does not match array dimension %d" % (ind, arg.ndim) ) numblocks = {a.name: a.numblocks for a, ind in arginds if ind is not None} dependencies = [] arrays = [] # Normalize arguments argindsstr = [] for a, ind in arginds: if ind is None: a = normalize_arg(a) a, collections = unpack_collections(a) dependencies.extend(collections) else: arrays.append(a) a = a.name argindsstr.extend((a, ind)) # Normalize keyword arguments kwargs2 = {} for k, v in kwargs.items(): v = normalize_arg(v) v, collections = unpack_collections(v) dependencies.extend(collections) kwargs2[k] = v # Finish up the name if not out: out = "%s-%s" % ( token or utils.funcname(func).strip("_"), base.tokenize(func, out_ind, argindsstr, dtype, **kwargs), ) graph = core_blockwise( func, out, out_ind, *argindsstr, numblocks=numblocks, dependencies=dependencies, new_axes=new_axes, concatenate=concatenate, **kwargs2 ) graph = HighLevelGraph.from_collections( out, graph, dependencies=arrays + dependencies ) chunks = [chunkss[i] for i in out_ind] if adjust_chunks: for i, ind in enumerate(out_ind): if ind in adjust_chunks: if callable(adjust_chunks[ind]): chunks[i] = tuple(map(adjust_chunks[ind], chunks[i])) elif isinstance(adjust_chunks[ind], numbers.Integral): chunks[i] = tuple(adjust_chunks[ind] for _ in chunks[i]) elif isinstance(adjust_chunks[ind], (tuple, list)): if len(adjust_chunks[ind]) != len(chunks[i]): raise ValueError( "Dimension {0} has {1} blocks, " "adjust_chunks specified with " "{2} blocks".format( i, len(chunks[i]), len(adjust_chunks[ind]) ) ) chunks[i] = tuple(adjust_chunks[ind]) else: raise NotImplementedError( "adjust_chunks values must be callable, int, or tuple" ) chunks = tuple(chunks) if meta is None: from .utils import compute_meta meta = compute_meta(func, dtype, *args[::2], **kwargs) if meta is not None: return Array(graph, out, chunks, meta=meta) else: return Array(graph, out, chunks, dtype=dtype)
def blockwise(func, output, output_indices, *arrind_pairs, numblocks=None, concatenate=None, new_axes=None, dependencies=(), **kwargs): """Create a Blockwise symbolic mutable mapping This is like the ``make_blockwise_graph`` function, but rather than construct a dict, it returns a symbolic Blockwise object. See Also -------- make_blockwise_graph Blockwise """ new_axes = new_axes or {} arrind_pairs = list(arrind_pairs) # Transform indices to canonical elements # We use terms like _0, and _1 rather than provided index elements unique_indices = { i for ii in arrind_pairs[1::2] if ii is not None for i in ii } | set(output_indices) sub = { k: blockwise_token(i, ".") for i, k in enumerate(sorted(unique_indices)) } output_indices = index_subs(tuple(output_indices), sub) a_pairs_list = [] for a in arrind_pairs[1::2]: if a is not None: val = tuple(a) else: val = a a_pairs_list.append(index_subs(val, sub)) arrind_pairs[1::2] = a_pairs_list new_axes = {index_subs((k, ), sub)[0]: v for k, v in new_axes.items()} # Unpack dask values in non-array arguments argpairs = toolz.partition(2, arrind_pairs) # separate argpairs into two separate tuples inputs = [] inputs_indices = [] for name, index in argpairs: inputs.append(name) inputs_indices.append(index) # Unpack delayed objects in kwargs new_keys = {n for c in dependencies for n in c.__dask_layers__()} if kwargs: # replace keys in kwargs with _0 tokens new_tokens = tuple( blockwise_token(i) for i in range(len(inputs), len(inputs) + len(new_keys))) sub = dict(zip(new_keys, new_tokens)) inputs.extend(new_keys) inputs_indices.extend((None, ) * len(new_keys)) kwargs = subs(kwargs, sub) indices = [(k, v) for k, v in zip(inputs, inputs_indices)] keys = map(blockwise_token, range(len(inputs))) # Construct local graph if not kwargs: subgraph = {output: (func, ) + tuple(keys)} else: _keys = list(keys) if new_keys: _keys = _keys[:-len(new_keys)] kwargs2 = (dict, list(map(list, kwargs.items()))) subgraph = {output: (apply, func, _keys, kwargs2)} # Construct final output subgraph = Blockwise( output, output_indices, subgraph, indices, numblocks=numblocks, concatenate=concatenate, new_axes=new_axes, ) return subgraph
def make_blockwise_graph(func, output, out_indices, *arrind_pairs, **kwargs): """Tensor operation Applies a function, ``func``, across blocks from many different input collections. We arrange the pattern with which those blocks interact with sets of matching indices. E.g.:: make_blockwise_graph(func, 'z', 'i', 'x', 'i', 'y', 'i') yield an embarrassingly parallel communication pattern and is read as $$ z_i = func(x_i, y_i) $$ More complex patterns may emerge, including multiple indices:: make_blockwise_graph(func, 'z', 'ij', 'x', 'ij', 'y', 'ji') $$ z_{ij} = func(x_{ij}, y_{ji}) $$ Indices missing in the output but present in the inputs results in many inputs being sent to one function (see examples). Examples -------- Simple embarrassing map operation >>> inc = lambda x: x + 1 >>> make_blockwise_graph(inc, 'z', 'ij', 'x', 'ij', numblocks={'x': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (inc, ('x', 0, 0)), ('z', 0, 1): (inc, ('x', 0, 1)), ('z', 1, 0): (inc, ('x', 1, 0)), ('z', 1, 1): (inc, ('x', 1, 1))} Simple operation on two datasets >>> add = lambda x, y: x + y >>> make_blockwise_graph(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)), ('z', 1, 0): (add, ('x', 1, 0), ('y', 1, 0)), ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))} Operation that flips one of the datasets >>> addT = lambda x, y: x + y.T # Transpose each chunk >>> # z_ij ~ x_ij y_ji >>> # .. .. .. notice swap >>> make_blockwise_graph(addT, 'z', 'ij', 'x', 'ij', 'y', 'ji', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 1, 0)), ('z', 1, 0): (add, ('x', 1, 0), ('y', 0, 1)), ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))} Dot product with contraction over ``j`` index. Yields list arguments >>> make_blockwise_graph(dotmany, 'z', 'ik', 'x', 'ij', 'y', 'jk', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (dotmany, [('x', 0, 0), ('x', 0, 1)], [('y', 0, 0), ('y', 1, 0)]), ('z', 0, 1): (dotmany, [('x', 0, 0), ('x', 0, 1)], [('y', 0, 1), ('y', 1, 1)]), ('z', 1, 0): (dotmany, [('x', 1, 0), ('x', 1, 1)], [('y', 0, 0), ('y', 1, 0)]), ('z', 1, 1): (dotmany, [('x', 1, 0), ('x', 1, 1)], [('y', 0, 1), ('y', 1, 1)])} Pass ``concatenate=True`` to concatenate arrays ahead of time >>> make_blockwise_graph(f, 'z', 'i', 'x', 'ij', 'y', 'ij', concatenate=True, ... numblocks={'x': (2, 2), 'y': (2, 2,)}) # doctest: +SKIP {('z', 0): (f, (concatenate_axes, [('x', 0, 0), ('x', 0, 1)], (1,)), (concatenate_axes, [('y', 0, 0), ('y', 0, 1)], (1,))) ('z', 1): (f, (concatenate_axes, [('x', 1, 0), ('x', 1, 1)], (1,)), (concatenate_axes, [('y', 1, 0), ('y', 1, 1)], (1,)))} Supports Broadcasting rules >>> make_blockwise_graph(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (1, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)), ('z', 1, 0): (add, ('x', 0, 0), ('y', 1, 0)), ('z', 1, 1): (add, ('x', 0, 1), ('y', 1, 1))} Support keyword arguments with apply >>> def f(a, b=0): return a + b >>> make_blockwise_graph(f, 'z', 'i', 'x', 'i', numblocks={'x': (2,)}, b=10) # doctest: +SKIP {('z', 0): (apply, f, [('x', 0)], {'b': 10}), ('z', 1): (apply, f, [('x', 1)], {'b': 10})} Include literals by indexing with ``None`` >>> make_blockwise_graph(add, 'z', 'i', 'x', 'i', 100, None, numblocks={'x': (2,)}) # doctest: +SKIP {('z', 0): (add, ('x', 0), 100), ('z', 1): (add, ('x', 1), 100)} See Also -------- dask.array.blockwise dask.blockwise.blockwise """ numblocks = kwargs.pop("numblocks") concatenate = kwargs.pop("concatenate", None) new_axes = kwargs.pop("new_axes", {}) key_deps = kwargs.pop("key_deps", None) non_blockwise_keys = kwargs.pop("non_blockwise_keys", None) argpairs = list(toolz.partition(2, arrind_pairs)) if concatenate is True: from dask.array.core import concatenate_axes as concatenate block_names = set() all_indices = set() for name, ind in argpairs: if ind is not None: block_names.add(name) for x in ind: all_indices.add(x) assert set(numblocks) == block_names dummy_indices = all_indices - set(out_indices) # Dictionary mapping {i: 3, j: 4, ...} for i, j, ... the dimensions dims = broadcast_dimensions(argpairs, numblocks) for k, v in new_axes.items(): dims[k] = len(v) if isinstance(v, tuple) else 1 # For each position in the output space, we'll construct a # "coordinate set" that consists of # - the output indices # - the dummy indices # - the dummy indices, with indices replaced by zeros (for broadcasting), we # are careful to only emit a single dummy zero when concatenate=True to not # concatenate the same array with itself several times. # - a 0 to assist with broadcasting. index_pos, zero_pos = {}, {} for i, ind in enumerate(out_indices): index_pos[ind] = i zero_pos[ind] = -1 _dummies_list = [] for i, ind in enumerate(dummy_indices): index_pos[ind] = 2 * i + len(out_indices) zero_pos[ind] = 2 * i + 1 + len(out_indices) reps = 1 if concatenate else dims[ind] _dummies_list.append([list(range(dims[ind])), [0] * reps]) # ([0, 1, 2], [0, 0, 0], ...) For a dummy index of dimension 3 dummies = tuple(itertools.chain.from_iterable(_dummies_list)) dummies += (0, ) # For each coordinate position in each input, gives the position in # the coordinate set. coord_maps = [] # Axes along which to concatenate, for each input concat_axes = [] for arg, ind in argpairs: if ind is not None: coord_maps.append([ zero_pos[i] if nb == 1 else index_pos[i] for i, nb in zip(ind, numblocks[arg]) ]) concat_axes.append( [n for n, i in enumerate(ind) if i in dummy_indices]) else: coord_maps.append(None) concat_axes.append(None) # Unpack delayed objects in kwargs dsk2 = {} if kwargs: task, dsk2 = unpack_collections(kwargs) if dsk2: kwargs2 = task else: kwargs2 = kwargs if non_blockwise_keys is not None: non_blockwise_keys |= find_all_possible_keys([kwargs2]) # Find all non-blockwise keys in the input arguments if non_blockwise_keys is not None: for arg, ind in argpairs: if ind is None: non_blockwise_keys |= find_all_possible_keys([arg]) dsk = {} # Create argument lists for out_coords in itertools.product(*[range(dims[i]) for i in out_indices]): deps = set() coords = out_coords + dummies args = [] for cmap, axes, (arg, ind) in zip(coord_maps, concat_axes, argpairs): if ind is None: args.append(arg) else: arg_coords = tuple(coords[c] for c in cmap) if axes: tups = lol_product((arg, ), arg_coords) deps.update(flatten(tups)) if concatenate: tups = (concatenate, tups, axes) else: tups = (arg, ) + arg_coords deps.add(tups) args.append(tups) out_key = (output, ) + out_coords if kwargs: val = (apply, func, args, kwargs2) else: args.insert(0, func) val = tuple(args) dsk[out_key] = val if key_deps is not None: key_deps[out_key] = deps if dsk2: dsk.update(ensure_dict(dsk2)) return dsk
def make_blockwise_graph( func, output, out_indices, *arrind_pairs, numblocks=None, concatenate=None, new_axes=None, output_blocks=None, dims=None, deserializing=False, func_future_args=None, return_key_deps=False, io_deps=None, **kwargs, ): """Tensor operation Applies a function, ``func``, across blocks from many different input collections. We arrange the pattern with which those blocks interact with sets of matching indices. E.g.:: make_blockwise_graph(func, 'z', 'i', 'x', 'i', 'y', 'i') yield an embarrassingly parallel communication pattern and is read as $$ z_i = func(x_i, y_i) $$ More complex patterns may emerge, including multiple indices:: make_blockwise_graph(func, 'z', 'ij', 'x', 'ij', 'y', 'ji') $$ z_{ij} = func(x_{ij}, y_{ji}) $$ Indices missing in the output but present in the inputs results in many inputs being sent to one function (see examples). Examples -------- Simple embarrassing map operation >>> inc = lambda x: x + 1 >>> make_blockwise_graph(inc, 'z', 'ij', 'x', 'ij', numblocks={'x': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (inc, ('x', 0, 0)), ('z', 0, 1): (inc, ('x', 0, 1)), ('z', 1, 0): (inc, ('x', 1, 0)), ('z', 1, 1): (inc, ('x', 1, 1))} Simple operation on two datasets >>> add = lambda x, y: x + y >>> make_blockwise_graph(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)), ('z', 1, 0): (add, ('x', 1, 0), ('y', 1, 0)), ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))} Operation that flips one of the datasets >>> addT = lambda x, y: x + y.T # Transpose each chunk >>> # z_ij ~ x_ij y_ji >>> # .. .. .. notice swap >>> make_blockwise_graph(addT, 'z', 'ij', 'x', 'ij', 'y', 'ji', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 1, 0)), ('z', 1, 0): (add, ('x', 1, 0), ('y', 0, 1)), ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))} Dot product with contraction over ``j`` index. Yields list arguments >>> make_blockwise_graph(dotmany, 'z', 'ik', 'x', 'ij', 'y', 'jk', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (dotmany, [('x', 0, 0), ('x', 0, 1)], [('y', 0, 0), ('y', 1, 0)]), ('z', 0, 1): (dotmany, [('x', 0, 0), ('x', 0, 1)], [('y', 0, 1), ('y', 1, 1)]), ('z', 1, 0): (dotmany, [('x', 1, 0), ('x', 1, 1)], [('y', 0, 0), ('y', 1, 0)]), ('z', 1, 1): (dotmany, [('x', 1, 0), ('x', 1, 1)], [('y', 0, 1), ('y', 1, 1)])} Pass ``concatenate=True`` to concatenate arrays ahead of time >>> make_blockwise_graph(f, 'z', 'i', 'x', 'ij', 'y', 'ij', concatenate=True, ... numblocks={'x': (2, 2), 'y': (2, 2,)}) # doctest: +SKIP {('z', 0): (f, (concatenate_axes, [('x', 0, 0), ('x', 0, 1)], (1,)), (concatenate_axes, [('y', 0, 0), ('y', 0, 1)], (1,))) ('z', 1): (f, (concatenate_axes, [('x', 1, 0), ('x', 1, 1)], (1,)), (concatenate_axes, [('y', 1, 0), ('y', 1, 1)], (1,)))} Supports Broadcasting rules >>> make_blockwise_graph(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (1, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)), ('z', 1, 0): (add, ('x', 0, 0), ('y', 1, 0)), ('z', 1, 1): (add, ('x', 0, 1), ('y', 1, 1))} Support keyword arguments with apply >>> def f(a, b=0): return a + b >>> make_blockwise_graph(f, 'z', 'i', 'x', 'i', numblocks={'x': (2,)}, b=10) # doctest: +SKIP {('z', 0): (apply, f, [('x', 0)], {'b': 10}), ('z', 1): (apply, f, [('x', 1)], {'b': 10})} Include literals by indexing with ``None`` >>> make_blockwise_graph(add, 'z', 'i', 'x', 'i', 100, None, numblocks={'x': (2,)}) # doctest: +SKIP {('z', 0): (add, ('x', 0), 100), ('z', 1): (add, ('x', 1), 100)} See Also -------- dask.array.blockwise dask.blockwise.blockwise """ if numblocks is None: raise ValueError("Missing required numblocks argument.") new_axes = new_axes or {} io_deps = io_deps or {} argpairs = list(toolz.partition(2, arrind_pairs)) if return_key_deps: key_deps = {} if deserializing: from distributed.protocol.serialize import import_allowed_module from distributed.worker import dumps_function, warn_dumps else: from importlib import import_module as import_allowed_module # Check if there are tuple arguments in `io_deps`. # If so, we must use this tuple to construct the actual # IO-argument mapping. io_arg_mappings = {} for arg, val in io_deps.items(): if isinstance(val, tuple): _args = io_deps[arg] module_name, attr_name = _args[0].rsplit(".", 1) io_dep_map = getattr(import_allowed_module(module_name), attr_name) if deserializing: _args = io_dep_map.__dask_distributed_unpack__(*_args) io_arg_mappings[arg] = io_dep_map(*_args[1:]) if concatenate is True: from dask.array.core import concatenate_axes as concatenate # Dictionary mapping {i: 3, j: 4, ...} for i, j, ... the dimensions dims = dims or _make_dims(argpairs, numblocks, new_axes) # Generate the abstract "plan" before constructing # the actual graph (coord_maps, concat_axes, dummies) = _get_coord_mapping( dims, output, out_indices, numblocks, argpairs, concatenate, ) # Unpack delayed objects in kwargs dsk2 = {} if kwargs: task, dsk2 = unpack_collections(kwargs) if dsk2: kwargs2 = task else: kwargs2 = kwargs # Apply Culling. # Only need to construct the specified set of output blocks output_blocks = output_blocks or itertools.product( *[range(dims[i]) for i in out_indices]) dsk = {} # Create argument lists for out_coords in output_blocks: deps = set() coords = out_coords + dummies args = [] for cmap, axes, (arg, ind) in zip(coord_maps, concat_axes, argpairs): if ind is None: if deserializing: args.append(stringify_collection_keys(arg)) else: args.append(arg) else: arg_coords = tuple(coords[c] for c in cmap) if axes: tups = lol_product((arg, ), arg_coords) if arg not in io_deps: deps.update(flatten(tups)) if concatenate: tups = (concatenate, tups, axes) else: tups = (arg, ) + arg_coords if arg not in io_deps: deps.add(tups) # Replace "place-holder" IO keys with "real" args if arg in io_deps: # We don't want to stringify keys for args # we are replacing here idx = tups[1:] if arg in io_arg_mappings: args.append(io_arg_mappings[arg][idx]) else: # The required inputs for the IO function # are specified explicitly in `io_deps` # (Or the index is the only required arg) args.append(io_deps[arg].get(idx, idx)) elif deserializing: args.append(stringify_collection_keys(tups)) else: args.append(tups) out_key = (output, ) + out_coords if deserializing: deps.update(func_future_args) args += list(func_future_args) if kwargs: val = { "function": dumps_function(apply), "args": warn_dumps(args), "kwargs": warn_dumps(kwargs2), } else: val = {"function": func, "args": warn_dumps(args)} else: if kwargs: val = (apply, func, args, kwargs2) else: args.insert(0, func) val = tuple(args) dsk[out_key] = val if return_key_deps: key_deps[out_key] = deps if dsk2: dsk.update(ensure_dict(dsk2)) if return_key_deps: return dsk, key_deps else: return dsk
def make_blockwise_graph(func, output, out_indices, *arrind_pairs, **kwargs): """Tensor operation Applies a function, ``func``, across blocks from many different input collections. We arrange the pattern with which those blocks interact with sets of matching indices. E.g.:: make_blockwise_graph(func, 'z', 'i', 'x', 'i', 'y', 'i') yield an embarrassingly parallel communication pattern and is read as $$ z_i = func(x_i, y_i) $$ More complex patterns may emerge, including multiple indices:: make_blockwise_graph(func, 'z', 'ij', 'x', 'ij', 'y', 'ji') $$ z_{ij} = func(x_{ij}, y_{ji}) $$ Indices missing in the output but present in the inputs results in many inputs being sent to one function (see examples). Examples -------- Simple embarrassing map operation >>> inc = lambda x: x + 1 >>> make_blockwise_graph(inc, 'z', 'ij', 'x', 'ij', numblocks={'x': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (inc, ('x', 0, 0)), ('z', 0, 1): (inc, ('x', 0, 1)), ('z', 1, 0): (inc, ('x', 1, 0)), ('z', 1, 1): (inc, ('x', 1, 1))} Simple operation on two datasets >>> add = lambda x, y: x + y >>> make_blockwise_graph(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)), ('z', 1, 0): (add, ('x', 1, 0), ('y', 1, 0)), ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))} Operation that flips one of the datasets >>> addT = lambda x, y: x + y.T # Transpose each chunk >>> # z_ij ~ x_ij y_ji >>> # .. .. .. notice swap >>> make_blockwise_graph(addT, 'z', 'ij', 'x', 'ij', 'y', 'ji', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 1, 0)), ('z', 1, 0): (add, ('x', 1, 0), ('y', 0, 1)), ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))} Dot product with contraction over ``j`` index. Yields list arguments >>> make_blockwise_graph(dotmany, 'z', 'ik', 'x', 'ij', 'y', 'jk', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (dotmany, [('x', 0, 0), ('x', 0, 1)], [('y', 0, 0), ('y', 1, 0)]), ('z', 0, 1): (dotmany, [('x', 0, 0), ('x', 0, 1)], [('y', 0, 1), ('y', 1, 1)]), ('z', 1, 0): (dotmany, [('x', 1, 0), ('x', 1, 1)], [('y', 0, 0), ('y', 1, 0)]), ('z', 1, 1): (dotmany, [('x', 1, 0), ('x', 1, 1)], [('y', 0, 1), ('y', 1, 1)])} Pass ``concatenate=True`` to concatenate arrays ahead of time >>> make_blockwise_graph(f, 'z', 'i', 'x', 'ij', 'y', 'ij', concatenate=True, ... numblocks={'x': (2, 2), 'y': (2, 2,)}) # doctest: +SKIP {('z', 0): (f, (concatenate_axes, [('x', 0, 0), ('x', 0, 1)], (1,)), (concatenate_axes, [('y', 0, 0), ('y', 0, 1)], (1,))) ('z', 1): (f, (concatenate_axes, [('x', 1, 0), ('x', 1, 1)], (1,)), (concatenate_axes, [('y', 1, 0), ('y', 1, 1)], (1,)))} Supports Broadcasting rules >>> make_blockwise_graph(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (1, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)), ('z', 1, 0): (add, ('x', 0, 0), ('y', 1, 0)), ('z', 1, 1): (add, ('x', 0, 1), ('y', 1, 1))} Support keyword arguments with apply >>> def f(a, b=0): return a + b >>> make_blockwise_graph(f, 'z', 'i', 'x', 'i', numblocks={'x': (2,)}, b=10) # doctest: +SKIP {('z', 0): (apply, f, [('x', 0)], {'b': 10}), ('z', 1): (apply, f, [('x', 1)], {'b': 10})} Include literals by indexing with ``None`` >>> make_blockwise_graph(add, 'z', 'i', 'x', 'i', 100, None, numblocks={'x': (2,)}) # doctest: +SKIP {('z', 0): (add, ('x', 0), 100), ('z', 1): (add, ('x', 1), 100)} See Also -------- dask.array.blockwise dask.blockwise.blockwise """ numblocks = kwargs.pop("numblocks") concatenate = kwargs.pop("concatenate", None) new_axes = kwargs.pop("new_axes", {}) output_blocks = kwargs.pop("output_blocks", None) dims = kwargs.pop("dims", None) argpairs = list(toolz.partition(2, arrind_pairs)) deserializing = kwargs.pop("deserializing", False) func_future_args = kwargs.pop("func_future_args", None) return_key_deps = kwargs.pop("return_key_deps", False) if return_key_deps: key_deps = {} if deserializing: from distributed.worker import warn_dumps, dumps_function if concatenate is True: from dask.array.core import concatenate_axes as concatenate # Dictionary mapping {i: 3, j: 4, ...} for i, j, ... the dimensions dims = dims or _make_dims(argpairs, numblocks, new_axes) # Generate the abstract "plan" before constructing # the actual graph (coord_maps, concat_axes, dummies) = _get_coord_mapping( dims, output, out_indices, numblocks, argpairs, concatenate, ) # Unpack delayed objects in kwargs dsk2 = {} if kwargs: task, dsk2 = unpack_collections(kwargs) if dsk2: kwargs2 = task else: kwargs2 = kwargs # Apply Culling. # Only need to construct the specified set of output blocks output_blocks = output_blocks or itertools.product( *[range(dims[i]) for i in out_indices]) dsk = {} # Create argument lists for out_coords in output_blocks: deps = set() coords = out_coords + dummies args = [] for cmap, axes, (arg, ind) in zip(coord_maps, concat_axes, argpairs): if ind is None: args.append(arg) else: arg_coords = tuple(coords[c] for c in cmap) if axes: tups = lol_product((arg, ), arg_coords) deps.update(flatten(tups)) if concatenate: tups = (concatenate, tups, axes) else: tups = (arg, ) + arg_coords deps.add(tups) args.append(tups) out_key = (output, ) + out_coords if deserializing: deps.update(func_future_args) args = stringify_collection_keys(args) + list(func_future_args) if kwargs: val = { "function": dumps_function(apply), "args": warn_dumps(args), "kwargs": warn_dumps(kwargs2), } else: val = {"function": func, "args": warn_dumps(args)} else: if kwargs: val = (apply, func, args, kwargs2) else: args.insert(0, func) val = tuple(args) dsk[out_key] = val if return_key_deps: key_deps[out_key] = deps if dsk2: dsk.update(ensure_dict(dsk2)) if return_key_deps: return dsk, key_deps else: return dsk