Example #1
0
def test_from_delayed_optimize_fusion():
    # Test that DataFrame optimization fuses a `from_delayed`
    # layer with other Blockwise layers and input Delayed tasks.
    # See: https://github.com/dask/dask/pull/8852
    ddf = (dd.from_delayed(
        map(delayed(lambda x: pd.DataFrame({"x": [x] * 10})), range(10)),
        meta=pd.DataFrame({"x": [0] * 10}),
    ) + 1)
    # NOTE: Fusion requires `optimize_blockwise`` and `fuse_roots`
    assert isinstance(ddf.dask.layers[ddf._name], Blockwise)
    assert len(optimize(ddf.dask, ddf.__dask_keys__()).layers) == 1
Example #2
0
    def dataframe_optimize(dsk, keys, **kwargs):
        if not isinstance(keys, (list, set)):
            keys = [keys]
        keys = list(core.flatten(keys))

        if not isinstance(dsk, HighLevelGraph):
            dsk = HighLevelGraph.from_collections(id(dsk),
                                                  dsk,
                                                  dependencies=())

        dsk = rewrite_simple_shuffle_layer(dsk, keys=keys)
        return optimize(dsk, keys, **kwargs)
Example #3
0
 def dataframe_optimize(dsk, keys, **kwargs):
     warnings.warn("Custom dataframe shuffle optimization only works on "
                   "dask>=2020.12.0, you are on version "
                   f"{dask.__version__}, please upgrade Dask."
                   "Falling back to default dataframe optimizer.")
     return optimize(dsk, keys, **kwargs)