def test_dataframe_set_index_sync(wait, client): df = dd.demo.make_timeseries('2000', '2001', {'value': float, 'name': str, 'id': int}, freq='2H', partition_freq='1M', seed=1) df = client.persist(df) wait(df) df2 = df.set_index('name', shuffle='tasks') df2 = client.persist(df2) assert len(df2)
def test_dataframe_set_index_sync(wait, client): df = dd.demo.make_timeseries( "2000", "2001", {"value": float, "name": str, "id": int}, freq="2H", partition_freq="1M", seed=1, ) df = client.persist(df) wait(df) df2 = df.set_index("name", shuffle="tasks") df2 = client.persist(df2) assert len(df2)
def test_dataframe_set_index_sync(wait, client): df = dd.demo.make_timeseries('2000', '2001', { 'value': float, 'name': str, 'id': int }, freq='2H', partition_freq='1M', seed=1) df = client.persist(df) wait(df) df2 = df.set_index('name', shuffle='tasks') df2 = client.persist(df2) assert len(df2)
def test_n_workers(self) -> None: with LocalCluster(n_workers=2) as cluster: with Client(cluster) as client: workers = list(_get_client_workers(client).keys()) from sklearn.datasets import load_breast_cancer X, y = load_breast_cancer(return_X_y=True) dX = client.submit(da.from_array, X, workers=[workers[0]]).result() dy = client.submit(da.from_array, y, workers=[workers[0]]).result() train = xgb.dask.DaskDMatrix(client, dX, dy) dX = dd.from_array(X) dX = client.persist(dX, workers={dX: workers[1]}) dy = dd.from_array(y) dy = client.persist(dy, workers={dy: workers[1]}) valid = xgb.dask.DaskDMatrix(client, dX, dy) merged = xgb.dask._get_workers_from_data(train, evals=[(valid, 'Valid')]) assert len(merged) == 2
def test_futures_in_graph(c): x, y = delayed(1), delayed(2) xx = delayed(add)(x, x) yy = delayed(add)(y, y) xxyy = delayed(add)(xx, yy) xxyy2 = c.persist(xxyy) xxyy3 = delayed(add)(xxyy2, 10) assert xxyy3.compute(scheduler="dask.distributed") == ((1 + 1) + (2 + 2)) + 10