def _rayify_task(task, key, deps): """ Rayifies the given task, submitting it as a Ray task to the Ray cluster. Args: task: A Dask graph value, being either a literal, dependency key, Dask task, or a list thereof. key: The Dask graph key for the given task. deps: The dependencies of this task. Returns: A literal, a Ray object reference representing a submitted task, or a list thereof. """ if isinstance(task, list): # Recursively rayify this list. This will still bottom out at the first # actual task encountered, inlining any tasks in that task's arguments. return [_rayify_task(t, deps) for t in task] elif istask(task): # Unpacks and repacks Ray object references and submits the task to the # Ray cluster for execution. func, args = task[0], task[1:] # If the function's arguments contain nested object references, we must # unpack said object references into a flat set of arguments so that # Ray properly tracks the object dependencies between Ray tasks. object_refs, repack = unpack_object_refs(args, deps) # Submit the task using a wrapper function. return dask_task_wrapper.remote(func, repack, *object_refs) elif not ishashable(task): return task elif task in deps: return deps[task] else: return task
def _get_arg(a): # if arg contains layer index or callable objs, handle it if ishashable(a) and a in dsk.keys(): while ishashable(a) and a in dsk.keys(): a = dsk[a] return _execute_task(a) elif not isinstance(a, str) and hasattr(a, "__getitem__"): if istask( a ): # TODO:Handle `SubgraphCallable`, which may contains dsk in it return spawn(a[0], args=tuple(_get_arg(i) for i in a[1:])) elif isinstance(a, dict): return {k: _get_arg(v) for k, v in a.items()} elif isinstance(a, List) or isinstance(a, Tuple): return type(a)(_get_arg(i) for i in a) return a
def find_symbol(self, arg): """Find the symbol associated with a given function argument. If the argument is a known key for either an external value or a previously registered function call, the appropriate symbol (either `varN` or `retN`) for the value will be returned. If this argument is not a known key, return None """ if ishashable(arg): # is this a Dask key that maps to an input variable? sym = self.var_key_to_sym.get(arg, None) if sym is not None: return sym # is this a Dask key that maps to a function? func_sym = self.func_key_to_sym.get(arg, None) if func_sym is not None: # the symbol is the return value from calling the function return self.func_sym_to_ret_sym[func_sym]
def inline(dsk, keys=None, inline_constants=True, dependencies=None): """Return new dask with the given keys inlined with their values. Inlines all constants if ``inline_constants`` keyword is True. Note that the constant keys will remain in the graph, to remove them follow ``inline`` with ``cull``. Examples -------- >>> def inc(x): ... return x + 1 >>> def add(x, y): ... return x + y >>> d = {'x': 1, 'y': (inc, 'x'), 'z': (add, 'x', 'y')} >>> inline(d) # doctest: +ELLIPSIS {'x': 1, 'y': (<function inc at ...>, 1), 'z': (<function add at ...>, 1, 'y')} >>> inline(d, keys='y') # doctest: +ELLIPSIS {'x': 1, 'y': (<function inc at ...>, 1), 'z': (<function add at ...>, 1, (<function inc at ...>, 1))} >>> inline(d, keys='y', inline_constants=False) # doctest: +ELLIPSIS {'x': 1, 'y': (<function inc at ...>, 'x'), 'z': (<function add at ...>, 'x', (<function inc at ...>, 'x'))} """ if dependencies and isinstance(next(iter(dependencies.values())), list): dependencies = {k: set(v) for k, v in dependencies.items()} keys = _flat_set(keys) if dependencies is None: dependencies = {k: get_dependencies(dsk, k) for k in dsk} if inline_constants: keys.update( k for k, v in dsk.items() if (ishashable(v) and v in dsk) or (not dependencies[k] and not istask(v)) ) # Keys may depend on other keys, so determine replace order with toposort. # The values stored in `keysubs` do not include other keys. replaceorder = toposort( {k: dsk[k] for k in keys if k in dsk}, dependencies=dependencies ) keysubs = {} for key in replaceorder: val = dsk[key] for dep in keys & dependencies[key]: if dep in keysubs: replace = keysubs[dep] else: replace = dsk[dep] val = subs(val, dep, replace) keysubs[key] = val # Make new dask with substitutions dsk2 = keysubs.copy() for key, val in dsk.items(): if key not in dsk2: for item in keys & dependencies[key]: val = subs(val, item, keysubs[item]) dsk2[key] = val return dsk2
def _to_cytoscape_json( dsk, data_attributes=None, function_attributes=None, collapse_outputs=False, verbose=False, **kwargs, ): """ Convert a dask graph to Cytoscape JSON: https://js.cytoscape.org/#notation/elements-json """ nodes = [] edges = [] data = {"nodes": nodes, "edges": edges} data_attributes = data_attributes or {} function_attributes = function_attributes or {} seen = set() connected = set() for k, v in dsk.items(): k_name = name(k) if istask(v): func_name = name((k, "function")) if not collapse_outputs else k_name if collapse_outputs or func_name not in seen: seen.add(func_name) attrs = function_attributes.get(k, {}).copy() nodes.append( { "data": { "id": func_name, "label": key_split(k), "shape": "ellipse", "color": "gray", **attrs, } } ) if not collapse_outputs: edges.append({"data": {"source": func_name, "target": k_name}}) connected.add(func_name) connected.add(k_name) for dep in get_dependencies(dsk, k): dep_name = name(dep) if dep_name not in seen: seen.add(dep_name) attrs = data_attributes.get(dep, {}).copy() nodes.append( { "data": { "id": dep_name, "label": box_label(dep, verbose), "shape": "rectangle", "color": "gray", **attrs, } } ) edges.append( { "data": { "source": dep_name, "target": func_name, } } ) connected.add(dep_name) connected.add(func_name) elif ishashable(v) and v in dsk: v_name = name(v) edges.append( { "data": { "source": v_name, "target": k_name, } } ) connected.add(v_name) connected.add(k_name) if (not collapse_outputs or k_name in connected) and k_name not in seen: seen.add(k_name) attrs = data_attributes.get(k, {}).copy() nodes.append( { "data": { "id": k_name, "label": box_label(k, verbose), "shape": "rectangle", "color": "gray", **attrs, } } ) return data
def to_graphviz( dsk, data_attributes=None, function_attributes=None, rankdir="BT", graph_attr=None, node_attr=None, edge_attr=None, collapse_outputs=False, verbose=False, **kwargs, ): graphviz = import_required( "graphviz", "Drawing dask graphs with the graphviz engine requires the `graphviz` " "python library and the `graphviz` system library.\n\n" "Please either conda or pip install as follows:\n\n" " conda install python-graphviz # either conda install\n" " python -m pip install graphviz # or pip install and follow installation instructions", ) data_attributes = data_attributes or {} function_attributes = function_attributes or {} graph_attr = graph_attr or {} node_attr = node_attr or {} edge_attr = edge_attr or {} graph_attr["rankdir"] = rankdir node_attr["fontname"] = "helvetica" graph_attr.update(kwargs) g = graphviz.Digraph( graph_attr=graph_attr, node_attr=node_attr, edge_attr=edge_attr ) seen = set() connected = set() for k, v in dsk.items(): k_name = name(k) if istask(v): func_name = name((k, "function")) if not collapse_outputs else k_name if collapse_outputs or func_name not in seen: seen.add(func_name) attrs = function_attributes.get(k, {}).copy() attrs.setdefault("label", key_split(k)) attrs.setdefault("shape", "circle") g.node(func_name, **attrs) if not collapse_outputs: g.edge(func_name, k_name) connected.add(func_name) connected.add(k_name) for dep in get_dependencies(dsk, k): dep_name = name(dep) if dep_name not in seen: seen.add(dep_name) attrs = data_attributes.get(dep, {}).copy() attrs.setdefault("label", box_label(dep, verbose)) attrs.setdefault("shape", "box") g.node(dep_name, **attrs) g.edge(dep_name, func_name) connected.add(dep_name) connected.add(func_name) elif ishashable(v) and v in dsk: v_name = name(v) g.edge(v_name, k_name) connected.add(v_name) connected.add(k_name) if (not collapse_outputs or k_name in connected) and k_name not in seen: seen.add(k_name) attrs = data_attributes.get(k, {}).copy() attrs.setdefault("label", box_label(k, verbose)) attrs.setdefault("shape", "box") g.node(k_name, **attrs) return g
def _rayify_task( task, key, deps, ray_presubmit_cbs, ray_postsubmit_cbs, ray_pretask_cbs, ray_posttask_cbs, ): """ Rayifies the given task, submitting it as a Ray task to the Ray cluster. Args: task (tuple): A Dask graph value, being either a literal, dependency key, Dask task, or a list thereof. key (str): The Dask graph key for the given task. deps (dict): The dependencies of this task. ray_presubmit_cbs (callable): Pre-task submission callbacks. ray_postsubmit_cbs (callable): Post-task submission callbacks. ray_pretask_cbs (callable): Pre-task execution callbacks. ray_posttask_cbs (callable): Post-task execution callbacks. Returns: A literal, a Ray object reference representing a submitted task, or a list thereof. """ if isinstance(task, list): # Recursively rayify this list. This will still bottom out at the first # actual task encountered, inlining any tasks in that task's arguments. return [ _rayify_task( t, key, deps, ray_presubmit_cbs, ray_postsubmit_cbs, ray_pretask_cbs, ray_posttask_cbs, ) for t in task ] elif istask(task): # Unpacks and repacks Ray object references and submits the task to the # Ray cluster for execution. if ray_presubmit_cbs is not None: alternate_returns = [ cb(task, key, deps) for cb in ray_presubmit_cbs ] for alternate_return in alternate_returns: # We don't submit a Ray task if a presubmit callback returns # a non-`None` value, instead we return said value. # NOTE: This returns the first non-None presubmit callback # return value. if alternate_return is not None: return alternate_return func, args = task[0], task[1:] if func is multiple_return_get: return _execute_task(task, deps) # If the function's arguments contain nested object references, we must # unpack said object references into a flat set of arguments so that # Ray properly tracks the object dependencies between Ray tasks. arg_object_refs, repack = unpack_object_refs(args, deps) # Submit the task using a wrapper function. object_refs = dask_task_wrapper.options( name=f"dask:{key!s}", num_returns=(1 if not isinstance(func, MultipleReturnFunc) else func.num_returns), ).remote( func, repack, key, ray_pretask_cbs, ray_posttask_cbs, *arg_object_refs, ) if ray_postsubmit_cbs is not None: for cb in ray_postsubmit_cbs: cb(task, key, deps, object_refs) return object_refs elif not ishashable(task): return task elif task in deps: return deps[task] else: return task
def _add_logging(dsk, ignore=None): """ Add logging to a Dask graph. @param dsk: The Dask graph. @return: New Dask graph. """ ctx = current_action() result = {} # Use topological sort to ensure Eliot actions are in logical order of # execution in Dask: keys = toposort(dsk) # Give each key a string name. Some keys are just aliases to other # keys, so make sure we have underlying key available. Later on might # want to shorten them as well. def simplify(k): if isinstance(k, str): return k return "-".join(str(o) for o in k) key_names = {} for key in keys: value = dsk[key] if not callable(value) and ishashable(value) and value in keys: # It's an alias for another key: key_names[key] = key_names[value] else: key_names[key] = simplify(key) # Values in the graph can be either: # # 1. A list of other values. # 2. A tuple, where first value might be a callable, aka a task. # 3. A literal of some sort. def maybe_wrap(key, value): if isinstance(value, list): return [maybe_wrap(key, v) for v in value] elif isinstance(value, tuple): func = value[0] args = value[1:] if not callable(func): # Not a callable, so nothing to wrap. return value wrapped_func = _RunWithEliotContext( task_id=str(ctx.serialize_task_id(), "utf-8"), func=func, key=key_names[key], dependencies=[ key_names[k] for k in get_dependencies(dsk, key) ], ) return (wrapped_func, ) + args else: return value # Replace function with wrapper that logs appropriate Action; iterate in # topological order so action task levels are in reasonable order. for key in keys: result[key] = maybe_wrap(key, dsk[key]) assert set(result.keys()) == set(dsk.keys()) return result
def to_graphviz( dsk, data_attributes=None, function_attributes=None, rankdir="BT", graph_attr=None, node_attr=None, edge_attr=None, collapse_outputs=False, verbose=False, **kwargs, ): data_attributes = data_attributes or {} function_attributes = function_attributes or {} graph_attr = graph_attr or {} node_attr = node_attr or {} edge_attr = edge_attr or {} graph_attr["rankdir"] = rankdir node_attr["fontname"] = "helvetica" graph_attr.update(kwargs) g = graphviz.Digraph(graph_attr=graph_attr, node_attr=node_attr, edge_attr=edge_attr) seen = set() connected = set() for k, v in dsk.items(): k_name = name(k) if istask(v): func_name = name( (k, "function")) if not collapse_outputs else k_name if collapse_outputs or func_name not in seen: seen.add(func_name) attrs = function_attributes.get(k, {}).copy() attrs.setdefault("label", key_split(k)) attrs.setdefault("shape", "circle") g.node(func_name, **attrs) if not collapse_outputs: g.edge(func_name, k_name) connected.add(func_name) connected.add(k_name) for dep in get_dependencies(dsk, k): dep_name = name(dep) if dep_name not in seen: seen.add(dep_name) attrs = data_attributes.get(dep, {}).copy() attrs.setdefault("label", box_label(dep, verbose)) attrs.setdefault("shape", "box") g.node(dep_name, **attrs) g.edge(dep_name, func_name) connected.add(dep_name) connected.add(func_name) elif ishashable(v) and v in dsk: v_name = name(v) g.edge(v_name, k_name) connected.add(v_name) connected.add(k_name) if (not collapse_outputs or k_name in connected) and k_name not in seen: seen.add(k_name) attrs = data_attributes.get(k, {}).copy() attrs.setdefault("label", box_label(k, verbose)) attrs.setdefault("shape", "box") g.node(k_name, **attrs) return g