def test_futures_to_delayed_bag(c): db = pytest.importorskip("dask.bag") L = [1, 2, 3] futures = c.scatter([L, L]) b = db.from_delayed(futures) assert list(b) == L + L
def test_unpublish_sync(client): data = client.scatter([0, 1, 2]) client.publish_dataset(data=data) client.unpublish_dataset(name="data") with pytest.raises(KeyError) as exc_info: client.get_dataset(name="data") assert "not found" in str(exc_info.value) assert "data" in str(exc_info.value)
def test_unpublish_sync(client): data = client.scatter([0, 1, 2]) client.publish_dataset(data=data) client.unpublish_dataset(name='data') with pytest.raises(KeyError) as exc_info: result = client.get_dataset(name='data') assert "not found" in str(exc_info.value) assert "data" in str(exc_info.value)
def test_futures_to_delayed_dataframe(c): pd = pytest.importorskip("pandas") dd = pytest.importorskip("dask.dataframe") df = pd.DataFrame({"x": [1, 2, 3]}) futures = c.scatter([df, df]) ddf = dd.from_delayed(futures) dd.utils.assert_eq(ddf.compute(), pd.concat([df, df], axis=0)) with pytest.raises(TypeError): ddf = dd.from_delayed([1, 2])
def test_futures_to_delayed_array(c): da = pytest.importorskip("dask.array") from dask.array.utils import assert_eq np = pytest.importorskip("numpy") x = np.arange(5) futures = c.scatter([x, x]) A = da.concatenate( [da.from_delayed(f, shape=x.shape, dtype=x.dtype) for f in futures], axis=0 ) assert_eq(A.compute(), np.concatenate([x, x], axis=0))
def test_futures_to_delayed_dataframe(c): pd = pytest.importorskip("pandas") dd = pytest.importorskip("dask.dataframe") from dask.array.numpy_compat import _numpy_120 if _numpy_120: pytest.skip("https://github.com/dask/dask/issues/7170") df = pd.DataFrame({"x": [1, 2, 3]}) futures = c.scatter([df, df]) ddf = dd.from_delayed(futures) dd.utils.assert_eq(ddf.compute(), pd.concat([df, df], axis=0)) with pytest.raises(TypeError): ddf = dd.from_delayed([1, 2])
def test_blockwise_dataframe_io(c, tmpdir, io, fuse, from_futures): pd = pytest.importorskip("pandas") dd = pytest.importorskip("dask.dataframe") # TODO: this configuration is flaky on osx in CI # See https://github.com/dask/dask/issues/8816 if from_futures and sys.platform == "darwin": pytest.xfail("This test sometimes fails on osx in CI") df = pd.DataFrame({"x": [1, 2, 3] * 5, "y": range(15)}) if from_futures: parts = [df.iloc[:5], df.iloc[5:10], df.iloc[10:15]] futs = c.scatter(parts) ddf0 = dd.from_delayed(futs, meta=parts[0]) else: ddf0 = dd.from_pandas(df, npartitions=3) if io.startswith("parquet"): if io == "parquet-pyarrow": pytest.importorskip("pyarrow.parquet") engine = "pyarrow" else: pytest.importorskip("fastparquet") engine = "fastparquet" ddf0.to_parquet(str(tmpdir), engine=engine) ddf = dd.read_parquet(str(tmpdir), engine=engine) elif io == "csv": ddf0.to_csv(str(tmpdir), index=False) ddf = dd.read_csv(os.path.join(str(tmpdir), "*")) elif io == "hdf": pytest.importorskip("tables") fn = str(tmpdir.join("h5")) ddf0.to_hdf(fn, "/data*") ddf = dd.read_hdf(fn, "/data*") df = df[["x"]] + 10 ddf = ddf[["x"]] + 10 with dask.config.set({"optimization.fuse.active": fuse}): ddf.compute() dsk = dask.dataframe.optimize(ddf.dask, ddf.__dask_keys__()) # dsk should not be a dict unless fuse is explicitly True assert isinstance(dsk, dict) == bool(fuse) dd.assert_eq(ddf, df, check_index=False)
def test_futures_to_delayed_bag(c): L = [1, 2, 3] futures = c.scatter([L, L]) b = db.from_delayed(futures) assert list(b) == L + L