def test_from_delayed_optimize_fusion(): # Test that DataFrame optimization fuses a `from_delayed` # layer with other Blockwise layers and input Delayed tasks. # See: https://github.com/dask/dask/pull/8852 ddf = (dd.from_delayed( map(delayed(lambda x: pd.DataFrame({"x": [x] * 10})), range(10)), meta=pd.DataFrame({"x": [0] * 10}), ) + 1) # NOTE: Fusion requires `optimize_blockwise`` and `fuse_roots` assert isinstance(ddf.dask.layers[ddf._name], Blockwise) assert len(optimize(ddf.dask, ddf.__dask_keys__()).layers) == 1
def dataframe_optimize(dsk, keys, **kwargs): if not isinstance(keys, (list, set)): keys = [keys] keys = list(core.flatten(keys)) if not isinstance(dsk, HighLevelGraph): dsk = HighLevelGraph.from_collections(id(dsk), dsk, dependencies=()) dsk = rewrite_simple_shuffle_layer(dsk, keys=keys) return optimize(dsk, keys, **kwargs)
def dataframe_optimize(dsk, keys, **kwargs): warnings.warn("Custom dataframe shuffle optimization only works on " "dask>=2020.12.0, you are on version " f"{dask.__version__}, please upgrade Dask." "Falling back to default dataframe optimizer.") return optimize(dsk, keys, **kwargs)