async def progress_stream(address, interval): """Open a TCP connection to scheduler, receive progress messages The messages coming back are dicts containing counts of key groups:: {'inc': {'all': 5, 'memory': 2, 'erred': 0, 'released': 1}, 'dec': {'all': 1, 'memory': 0, 'erred': 0, 'released': 0}} Parameters ---------- address: address of scheduler interval: time between batches, in seconds Examples -------- >>> stream = await eventstream('127.0.0.1:8786', 0.100) # doctest: +SKIP >>> print(await read(stream)) # doctest: +SKIP """ address = coerce_to_address(address) comm = await connect(address) await comm.write({ "op": "feed", "setup": dumps_function(AllProgress), "function": dumps_function(counts), "interval": interval, "teardown": dumps_function(_remove_all_progress_plugin), }) return comm
def test_dumps_function(): a = dumps_function(inc) assert cloudpickle.loads(a)(10) == 11 b = dumps_function(inc) assert a is b c = dumps_function(dec) assert a != c
def __dask_distributed_pack__(self, client): from distributed.worker import dumps_function from distributed.utils import CancelledError from distributed.utils_comm import unpack_remotedata keys = tuple(map(blockwise_token, range(len(self.indices)))) dsk, _ = fuse(self.dsk, [self.output]) dsk = (SubgraphCallable(dsk, self.output, keys), ) dsk, dsk_unpacked_futures = unpack_remotedata(dsk, byte_keys=True) func = dumps_function(dsk[0]) func_future_args = dsk[1:] indices = list(toolz.concat(self.indices)) indices, indices_unpacked_futures = unpack_remotedata(indices, byte_keys=True) # Check the legality of the unpacked futures for future in itertools.chain(dsk_unpacked_futures, indices_unpacked_futures): if future.client is not client: raise ValueError( "Inputs contain futures that were created by another client." ) if stringify(future.key) not in client.futures: raise CancelledError(stringify(future.key)) # All blockwise tasks will depend on the futures in `indices` global_dependencies = tuple( stringify(f.key) for f in indices_unpacked_futures) ret = { "output": self.output, "output_indices": self.output_indices, "func": func, "func_future_args": func_future_args, "global_dependencies": global_dependencies, "indices": indices, "numblocks": self.numblocks, "concatenate": self.concatenate, "new_axes": self.new_axes, "io_subgraph": (self.io_name, self.io_subgraph) if self.io_name else (None, None), "output_blocks": self.output_blocks, "dims": self.dims, } return ret
def asproxy(obj: object, serializers: Iterable[str] = None, subclass: Type["ProxyObject"] = None) -> "ProxyObject": """Wrap `obj` in a ProxyObject object if it isn't already. Parameters ---------- obj: object Object to wrap in a ProxyObject object. serializers: Iterable[str], optional Serializers to use to serialize `obj`. If None, no serialization is done. subclass: class, optional Specify a subclass of ProxyObject to create instead of ProxyObject. `subclass` must be pickable. Returns ------- The ProxyObject proxying `obj` """ if isinstance(obj, ProxyObject): # Already a proxy object ret = obj elif isinstance(obj, (list, set, tuple, dict)): raise ValueError( f"Cannot wrap a collection ({type(obj)}) in a proxy object") else: fixed_attr = {} for attr in _FIXED_ATTRS: try: val = getattr(obj, attr) if callable(val): val = val() fixed_attr[attr] = val except (AttributeError, TypeError): pass if subclass is None: subclass = ProxyObject subclass_serialized = None else: subclass_serialized = dumps_function(subclass) ret = subclass( ProxyDetail( obj=obj, fixed_attr=fixed_attr, type_serialized=pickle.dumps(type(obj)), typename=dask.utils.typename(type(obj)), is_cuda_object=is_device_object(obj), subclass=subclass_serialized, serializer=None, explicit_proxy=False, )) if serializers is not None: ret._pxy_serialize(serializers=serializers) return ret
def asproxy(obj, serializers=None, subclass=None) -> "ProxyObject": """Wrap `obj` in a ProxyObject object if it isn't already. Parameters ---------- obj: object Object to wrap in a ProxyObject object. serializers: list(str), optional List of serializers to use to serialize `obj`. If None, no serialization is done. subclass: class, optional Specify a subclass of ProxyObject to create instead of ProxyObject. `subclass` must be pickable. Returns ------- The ProxyObject proxying `obj` """ if hasattr(obj, "_obj_pxy"): # Already a proxy object ret = obj else: fixed_attr = {} for attr in _FIXED_ATTRS: try: val = getattr(obj, attr) if callable(val): val = val() fixed_attr[attr] = val except (AttributeError, TypeError): pass if subclass is None: subclass = ProxyObject subclass_serialized = None else: subclass_serialized = dumps_function(subclass) ret = subclass( obj=obj, fixed_attr=fixed_attr, type_serialized=pickle.dumps(type(obj)), typename=dask.utils.typename(type(obj)), is_cuda_object=is_device_object(obj), subclass=subclass_serialized, serializers=None, explicit_proxy=False, ) if serializers is not None: ret._obj_pxy_serialize(serializers=serializers) return ret
async def eventstream(address, interval): """Open a TCP connection to scheduler, receive batched task messages The messages coming back are lists of dicts. Each dict is of the following form:: {'key': 'mykey', 'worker': 'host:port', 'status': status, 'compute_start': time(), 'compute_stop': time(), 'transfer_start': time(), 'transfer_stop': time(), 'disk_load_start': time(), 'disk_load_stop': time(), 'other': 'junk'} Where ``status`` is either 'OK', or 'error' Parameters ---------- address: address of scheduler interval: time between batches, in seconds Examples -------- >>> stream = await eventstream('127.0.0.1:8786', 0.100) # doctest: +SKIP >>> print(await read(stream)) # doctest: +SKIP [{'key': 'x', 'status': 'OK', 'worker': '192.168.0.1:54684', ...}, {'key': 'y', 'status': 'error', 'worker': '192.168.0.1:54684', ...}] """ address = coerce_to_address(address) comm = await connect(address) await comm.write({ "op": "feed", "setup": dumps_function(EventStream), "function": dumps_function(swap_buffer), "interval": interval, "teardown": dumps_function(teardown), }) return comm
def make_blockwise_graph( func, output, out_indices, *arrind_pairs, numblocks=None, concatenate=None, new_axes=None, output_blocks=None, dims=None, deserializing=False, func_future_args=None, return_key_deps=False, io_deps=None, **kwargs, ): """Tensor operation Applies a function, ``func``, across blocks from many different input collections. We arrange the pattern with which those blocks interact with sets of matching indices. E.g.:: make_blockwise_graph(func, 'z', 'i', 'x', 'i', 'y', 'i') yield an embarrassingly parallel communication pattern and is read as $$ z_i = func(x_i, y_i) $$ More complex patterns may emerge, including multiple indices:: make_blockwise_graph(func, 'z', 'ij', 'x', 'ij', 'y', 'ji') $$ z_{ij} = func(x_{ij}, y_{ji}) $$ Indices missing in the output but present in the inputs results in many inputs being sent to one function (see examples). Examples -------- Simple embarrassing map operation >>> inc = lambda x: x + 1 >>> make_blockwise_graph(inc, 'z', 'ij', 'x', 'ij', numblocks={'x': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (inc, ('x', 0, 0)), ('z', 0, 1): (inc, ('x', 0, 1)), ('z', 1, 0): (inc, ('x', 1, 0)), ('z', 1, 1): (inc, ('x', 1, 1))} Simple operation on two datasets >>> add = lambda x, y: x + y >>> make_blockwise_graph(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)), ('z', 1, 0): (add, ('x', 1, 0), ('y', 1, 0)), ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))} Operation that flips one of the datasets >>> addT = lambda x, y: x + y.T # Transpose each chunk >>> # z_ij ~ x_ij y_ji >>> # .. .. .. notice swap >>> make_blockwise_graph(addT, 'z', 'ij', 'x', 'ij', 'y', 'ji', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 1, 0)), ('z', 1, 0): (add, ('x', 1, 0), ('y', 0, 1)), ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))} Dot product with contraction over ``j`` index. Yields list arguments >>> make_blockwise_graph(dotmany, 'z', 'ik', 'x', 'ij', 'y', 'jk', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (dotmany, [('x', 0, 0), ('x', 0, 1)], [('y', 0, 0), ('y', 1, 0)]), ('z', 0, 1): (dotmany, [('x', 0, 0), ('x', 0, 1)], [('y', 0, 1), ('y', 1, 1)]), ('z', 1, 0): (dotmany, [('x', 1, 0), ('x', 1, 1)], [('y', 0, 0), ('y', 1, 0)]), ('z', 1, 1): (dotmany, [('x', 1, 0), ('x', 1, 1)], [('y', 0, 1), ('y', 1, 1)])} Pass ``concatenate=True`` to concatenate arrays ahead of time >>> make_blockwise_graph(f, 'z', 'i', 'x', 'ij', 'y', 'ij', concatenate=True, ... numblocks={'x': (2, 2), 'y': (2, 2,)}) # doctest: +SKIP {('z', 0): (f, (concatenate_axes, [('x', 0, 0), ('x', 0, 1)], (1,)), (concatenate_axes, [('y', 0, 0), ('y', 0, 1)], (1,))) ('z', 1): (f, (concatenate_axes, [('x', 1, 0), ('x', 1, 1)], (1,)), (concatenate_axes, [('y', 1, 0), ('y', 1, 1)], (1,)))} Supports Broadcasting rules >>> make_blockwise_graph(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (1, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)), ('z', 1, 0): (add, ('x', 0, 0), ('y', 1, 0)), ('z', 1, 1): (add, ('x', 0, 1), ('y', 1, 1))} Support keyword arguments with apply >>> def f(a, b=0): return a + b >>> make_blockwise_graph(f, 'z', 'i', 'x', 'i', numblocks={'x': (2,)}, b=10) # doctest: +SKIP {('z', 0): (apply, f, [('x', 0)], {'b': 10}), ('z', 1): (apply, f, [('x', 1)], {'b': 10})} Include literals by indexing with ``None`` >>> make_blockwise_graph(add, 'z', 'i', 'x', 'i', 100, None, numblocks={'x': (2,)}) # doctest: +SKIP {('z', 0): (add, ('x', 0), 100), ('z', 1): (add, ('x', 1), 100)} See Also -------- dask.array.blockwise dask.blockwise.blockwise """ if numblocks is None: raise ValueError("Missing required numblocks argument.") new_axes = new_axes or {} io_deps = io_deps or {} argpairs = list(toolz.partition(2, arrind_pairs)) if return_key_deps: key_deps = {} if deserializing: from distributed.protocol.serialize import import_allowed_module from distributed.worker import dumps_function, warn_dumps else: from importlib import import_module as import_allowed_module # Check if there are tuple arguments in `io_deps`. # If so, we must use this tuple to construct the actual # IO-argument mapping. io_arg_mappings = {} for arg, val in io_deps.items(): if isinstance(val, tuple): _args = io_deps[arg] module_name, attr_name = _args[0].rsplit(".", 1) io_dep_map = getattr(import_allowed_module(module_name), attr_name) if deserializing: _args = io_dep_map.__dask_distributed_unpack__(*_args) io_arg_mappings[arg] = io_dep_map(*_args[1:]) if concatenate is True: from dask.array.core import concatenate_axes as concatenate # Dictionary mapping {i: 3, j: 4, ...} for i, j, ... the dimensions dims = dims or _make_dims(argpairs, numblocks, new_axes) # Generate the abstract "plan" before constructing # the actual graph (coord_maps, concat_axes, dummies) = _get_coord_mapping( dims, output, out_indices, numblocks, argpairs, concatenate, ) # Unpack delayed objects in kwargs dsk2 = {} if kwargs: task, dsk2 = unpack_collections(kwargs) if dsk2: kwargs2 = task else: kwargs2 = kwargs # Apply Culling. # Only need to construct the specified set of output blocks output_blocks = output_blocks or itertools.product( *[range(dims[i]) for i in out_indices]) dsk = {} # Create argument lists for out_coords in output_blocks: deps = set() coords = out_coords + dummies args = [] for cmap, axes, (arg, ind) in zip(coord_maps, concat_axes, argpairs): if ind is None: if deserializing: args.append(stringify_collection_keys(arg)) else: args.append(arg) else: arg_coords = tuple(coords[c] for c in cmap) if axes: tups = lol_product((arg, ), arg_coords) if arg not in io_deps: deps.update(flatten(tups)) if concatenate: tups = (concatenate, tups, axes) else: tups = (arg, ) + arg_coords if arg not in io_deps: deps.add(tups) # Replace "place-holder" IO keys with "real" args if arg in io_deps: # We don't want to stringify keys for args # we are replacing here idx = tups[1:] if arg in io_arg_mappings: args.append(io_arg_mappings[arg][idx]) else: # The required inputs for the IO function # are specified explicitly in `io_deps` # (Or the index is the only required arg) args.append(io_deps[arg].get(idx, idx)) elif deserializing: args.append(stringify_collection_keys(tups)) else: args.append(tups) out_key = (output, ) + out_coords if deserializing: deps.update(func_future_args) args += list(func_future_args) if kwargs: val = { "function": dumps_function(apply), "args": warn_dumps(args), "kwargs": warn_dumps(kwargs2), } else: val = {"function": func, "args": warn_dumps(args)} else: if kwargs: val = (apply, func, args, kwargs2) else: args.insert(0, func) val = tuple(args) dsk[out_key] = val if return_key_deps: key_deps[out_key] = deps if dsk2: dsk.update(ensure_dict(dsk2)) if return_key_deps: return dsk, key_deps else: return dsk
def __dask_distributed_pack__(self, all_hlg_keys, known_key_dependencies, client, client_keys): from distributed.protocol.serialize import import_allowed_module from distributed.utils import CancelledError from distributed.utils_comm import unpack_remotedata from distributed.worker import dumps_function keys = tuple(map(blockwise_token, range(len(self.indices)))) dsk, _ = fuse(self.dsk, [self.output]) # Embed literals in `dsk` keys2 = [] indices2 = [] for key, (val, index) in zip(keys, self.indices): if index is None: # Literal dsk[key] = val else: keys2.append(key) indices2.append((val, index)) dsk = (SubgraphCallable(dsk, self.output, tuple(keys2)), ) dsk, dsk_unpacked_futures = unpack_remotedata(dsk, byte_keys=True) func = dumps_function(dsk[0]) func_future_args = dsk[1:] indices = list(toolz.concat(indices2)) indices, indices_unpacked_futures = unpack_remotedata(indices, byte_keys=True) # Check the legality of the unpacked futures for future in itertools.chain(dsk_unpacked_futures, indices_unpacked_futures): if future.client is not client: raise ValueError( "Inputs contain futures that were created by another client." ) if stringify(future.key) not in client.futures: raise CancelledError(stringify(future.key)) # All blockwise tasks will depend on the futures in `indices` global_dependencies = { stringify(f.key) for f in indices_unpacked_futures } # Handle `io_deps` serialization. # If `io_deps[<collection_key>]` is just a dict, we rely # entirely on msgpack. It is up to the `Blockwise` layer to # ensure that all arguments are msgpack serializable. To enable # more control over serialization, a `BlockwiseIODeps` mapping # subclass can be defined with the necessary # `__dask_distributed_{pack,unpack}__` methods. packed_io_deps = {} for name, input_map in self.io_deps.items(): if isinstance(input_map, tuple): # Use the `__dask_distributed_pack__` definition for the # specified `BlockwiseIODeps` subclass module_name, attr_name = input_map[0].rsplit(".", 1) io_dep_map = getattr(import_allowed_module(module_name), attr_name) packed_io_deps[name] = io_dep_map.__dask_distributed_pack__( *input_map) else: packed_io_deps[name] = input_map return { "output": self.output, "output_indices": self.output_indices, "func": func, "func_future_args": func_future_args, "global_dependencies": global_dependencies, "indices": indices, "is_list": [isinstance(x, list) for x in indices], "numblocks": self.numblocks, "concatenate": self.concatenate, "new_axes": self.new_axes, "output_blocks": self.output_blocks, "dims": self.dims, "io_deps": packed_io_deps, }
def make_blockwise_graph(func, output, out_indices, *arrind_pairs, **kwargs): """Tensor operation Applies a function, ``func``, across blocks from many different input collections. We arrange the pattern with which those blocks interact with sets of matching indices. E.g.:: make_blockwise_graph(func, 'z', 'i', 'x', 'i', 'y', 'i') yield an embarrassingly parallel communication pattern and is read as $$ z_i = func(x_i, y_i) $$ More complex patterns may emerge, including multiple indices:: make_blockwise_graph(func, 'z', 'ij', 'x', 'ij', 'y', 'ji') $$ z_{ij} = func(x_{ij}, y_{ji}) $$ Indices missing in the output but present in the inputs results in many inputs being sent to one function (see examples). Examples -------- Simple embarrassing map operation >>> inc = lambda x: x + 1 >>> make_blockwise_graph(inc, 'z', 'ij', 'x', 'ij', numblocks={'x': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (inc, ('x', 0, 0)), ('z', 0, 1): (inc, ('x', 0, 1)), ('z', 1, 0): (inc, ('x', 1, 0)), ('z', 1, 1): (inc, ('x', 1, 1))} Simple operation on two datasets >>> add = lambda x, y: x + y >>> make_blockwise_graph(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)), ('z', 1, 0): (add, ('x', 1, 0), ('y', 1, 0)), ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))} Operation that flips one of the datasets >>> addT = lambda x, y: x + y.T # Transpose each chunk >>> # z_ij ~ x_ij y_ji >>> # .. .. .. notice swap >>> make_blockwise_graph(addT, 'z', 'ij', 'x', 'ij', 'y', 'ji', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 1, 0)), ('z', 1, 0): (add, ('x', 1, 0), ('y', 0, 1)), ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))} Dot product with contraction over ``j`` index. Yields list arguments >>> make_blockwise_graph(dotmany, 'z', 'ik', 'x', 'ij', 'y', 'jk', numblocks={'x': (2, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (dotmany, [('x', 0, 0), ('x', 0, 1)], [('y', 0, 0), ('y', 1, 0)]), ('z', 0, 1): (dotmany, [('x', 0, 0), ('x', 0, 1)], [('y', 0, 1), ('y', 1, 1)]), ('z', 1, 0): (dotmany, [('x', 1, 0), ('x', 1, 1)], [('y', 0, 0), ('y', 1, 0)]), ('z', 1, 1): (dotmany, [('x', 1, 0), ('x', 1, 1)], [('y', 0, 1), ('y', 1, 1)])} Pass ``concatenate=True`` to concatenate arrays ahead of time >>> make_blockwise_graph(f, 'z', 'i', 'x', 'ij', 'y', 'ij', concatenate=True, ... numblocks={'x': (2, 2), 'y': (2, 2,)}) # doctest: +SKIP {('z', 0): (f, (concatenate_axes, [('x', 0, 0), ('x', 0, 1)], (1,)), (concatenate_axes, [('y', 0, 0), ('y', 0, 1)], (1,))) ('z', 1): (f, (concatenate_axes, [('x', 1, 0), ('x', 1, 1)], (1,)), (concatenate_axes, [('y', 1, 0), ('y', 1, 1)], (1,)))} Supports Broadcasting rules >>> make_blockwise_graph(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (1, 2), ... 'y': (2, 2)}) # doctest: +SKIP {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)), ('z', 1, 0): (add, ('x', 0, 0), ('y', 1, 0)), ('z', 1, 1): (add, ('x', 0, 1), ('y', 1, 1))} Support keyword arguments with apply >>> def f(a, b=0): return a + b >>> make_blockwise_graph(f, 'z', 'i', 'x', 'i', numblocks={'x': (2,)}, b=10) # doctest: +SKIP {('z', 0): (apply, f, [('x', 0)], {'b': 10}), ('z', 1): (apply, f, [('x', 1)], {'b': 10})} Include literals by indexing with ``None`` >>> make_blockwise_graph(add, 'z', 'i', 'x', 'i', 100, None, numblocks={'x': (2,)}) # doctest: +SKIP {('z', 0): (add, ('x', 0), 100), ('z', 1): (add, ('x', 1), 100)} See Also -------- dask.array.blockwise dask.blockwise.blockwise """ numblocks = kwargs.pop("numblocks") concatenate = kwargs.pop("concatenate", None) new_axes = kwargs.pop("new_axes", {}) output_blocks = kwargs.pop("output_blocks", None) dims = kwargs.pop("dims", None) argpairs = list(toolz.partition(2, arrind_pairs)) deserializing = kwargs.pop("deserializing", False) func_future_args = kwargs.pop("func_future_args", None) return_key_deps = kwargs.pop("return_key_deps", False) if return_key_deps: key_deps = {} if deserializing: from distributed.worker import warn_dumps, dumps_function if concatenate is True: from dask.array.core import concatenate_axes as concatenate # Dictionary mapping {i: 3, j: 4, ...} for i, j, ... the dimensions dims = dims or _make_dims(argpairs, numblocks, new_axes) # Generate the abstract "plan" before constructing # the actual graph (coord_maps, concat_axes, dummies) = _get_coord_mapping( dims, output, out_indices, numblocks, argpairs, concatenate, ) # Unpack delayed objects in kwargs dsk2 = {} if kwargs: task, dsk2 = unpack_collections(kwargs) if dsk2: kwargs2 = task else: kwargs2 = kwargs # Apply Culling. # Only need to construct the specified set of output blocks output_blocks = output_blocks or itertools.product( *[range(dims[i]) for i in out_indices]) dsk = {} # Create argument lists for out_coords in output_blocks: deps = set() coords = out_coords + dummies args = [] for cmap, axes, (arg, ind) in zip(coord_maps, concat_axes, argpairs): if ind is None: args.append(arg) else: arg_coords = tuple(coords[c] for c in cmap) if axes: tups = lol_product((arg, ), arg_coords) deps.update(flatten(tups)) if concatenate: tups = (concatenate, tups, axes) else: tups = (arg, ) + arg_coords deps.add(tups) args.append(tups) out_key = (output, ) + out_coords if deserializing: deps.update(func_future_args) args = stringify_collection_keys(args) + list(func_future_args) if kwargs: val = { "function": dumps_function(apply), "args": warn_dumps(args), "kwargs": warn_dumps(kwargs2), } else: val = {"function": func, "args": warn_dumps(args)} else: if kwargs: val = (apply, func, args, kwargs2) else: args.insert(0, func) val = tuple(args) dsk[out_key] = val if return_key_deps: key_deps[out_key] = deps if dsk2: dsk.update(ensure_dict(dsk2)) if return_key_deps: return dsk, key_deps else: return dsk