Esempio n. 1
0
def _test_jit_unspill(protocol):
    import cudf
    from cudf.tests.utils import assert_eq

    dask.config.update(
        dask.config.global_config,
        {
            "ucx": {
                "TLS": "tcp,sockcm,cuda_copy",
            },
        },
        priority="new",
    )

    with dask_cuda.LocalCUDACluster(
            protocol=protocol,
            dashboard_address=None,
            n_workers=1,
            threads_per_worker=1,
            processes=True,
            jit_unspill=True,
            device_memory_limit="1B",
    ) as cluster:
        with Client(cluster):
            np.random.seed(42)
            df = cudf.DataFrame.from_pandas(
                pd.DataFrame({"key": np.random.random(100)}))
            ddf = dd.from_pandas(df.copy(), npartitions=4)
            ddf = explicit_comms_shuffle(ddf, ["key"])

            # Check the values of `ddf` (ignoring the row order)
            expected = df.sort_values("key")
            got = ddf.compute().sort_values("key")
            assert_eq(got, expected)
Esempio n. 2
0
def test_spilling_local_cuda_cluster(jit_unspill):
    """Testing spilling of a proxied cudf dataframe in a local cuda cluster"""
    cudf = pytest.importorskip("cudf")

    def task(x):
        assert isinstance(x, cudf.DataFrame)
        if jit_unspill:
            # Check that `x` is a proxy object and the proxied DataFrame is serialized
            assert type(x) is proxy_object.ProxyObject
            assert x._obj_pxy["serializers"] == ["dask", "pickle"]
        else:
            assert type(x) == cudf.DataFrame
        assert len(x) == 10  # Trigger deserialization
        return x

    # Notice, setting `device_memory_limit=1B` to trigger spilling
    with dask_cuda.LocalCUDACluster(n_workers=1,
                                    device_memory_limit="1B",
                                    jit_unspill=jit_unspill) as cluster:
        with Client(cluster):
            df = cudf.DataFrame({"a": range(10)})
            ddf = dask_cudf.from_cudf(df, npartitions=1)
            ddf = ddf.map_partitions(task, meta=df.head())
            got = ddf.compute()
            assert_frame_equal(got.to_pandas(), df.to_pandas())
def test_on_demand_debug_info():
    """Test worker logging when on-demand-spilling fails"""
    rmm = pytest.importorskip("rmm")
    if not hasattr(rmm.mr, "FailureCallbackResourceAdaptor"):
        pytest.skip("RMM doesn't implement FailureCallbackResourceAdaptor")

    total_mem = get_device_total_memory()

    def task():
        rmm.DeviceBuffer(size=total_mem + 1)

    with dask_cuda.LocalCUDACluster(n_workers=1, jit_unspill=True) as cluster:
        with Client(cluster) as client:
            # Warmup, which trigger the initialization of spill on demand
            client.submit(range, 10).result()

            # Submit too large RMM buffer
            with pytest.raises(
                MemoryError, match=r".*std::bad_alloc:.*CUDA error at:.*"
            ):
                client.submit(task).result()

            log = str(client.get_worker_logs())
            assert re.search(
                "WARNING - RMM allocation of .* failed, spill-on-demand", log
            )
            assert re.search("<ProxyManager dev_limit=.* host_limit=.*>: Empty", log)
            assert "traceback:" in log
async def test_worker_force_spill_to_disk():
    """Test Dask triggering CPU-to-Disk spilling """
    cudf = pytest.importorskip("cudf")

    with dask.config.set({"distributed.worker.memory.terminate": 0}):
        async with dask_cuda.LocalCUDACluster(
            n_workers=1, device_memory_limit="1MB", jit_unspill=True, asynchronous=True
        ) as cluster:
            async with Client(cluster, asynchronous=True) as client:
                # Create a df that are spilled to host memory immediately
                df = cudf.DataFrame({"key": np.arange(10 ** 8)})
                ddf = dask.dataframe.from_pandas(df, npartitions=1).persist()
                await ddf

                async def f():
                    """Trigger a memory_monitor() and reset memory_limit"""
                    w = get_worker()
                    # Set a host memory limit that triggers spilling to disk
                    w.memory_pause_fraction = False
                    memory = w.monitor.proc.memory_info().rss
                    w.memory_limit = memory - 10 ** 8
                    w.memory_target_fraction = 1
                    await w.memory_monitor()
                    # Check that host memory are freed
                    assert w.monitor.proc.memory_info().rss < memory - 10 ** 7
                    w.memory_limit = memory * 10  # Un-limit

                await client.submit(f)
                log = str(await client.get_worker_logs())
                # Check that the worker doesn't complain about unmanaged memory
                assert "Unmanaged memory use is high" not in log
def test_compatibility_mode_dataframe_shuffle(compatibility_mode, npartitions):
    cudf = pytest.importorskip("cudf")

    def is_proxy_object(x):
        return "ProxyObject" in str(type(x))

    with dask.config.set(jit_unspill_compatibility_mode=compatibility_mode):
        with dask_cuda.LocalCUDACluster(n_workers=1,
                                        jit_unspill=True) as cluster:
            with Client(cluster):
                ddf = dask.dataframe.from_pandas(cudf.DataFrame(
                    {"key": np.arange(10)}),
                                                 npartitions=npartitions)
                res = ddf.shuffle(on="key", shuffle="tasks").persist()

                # With compatibility mode on, we shouldn't encounter any proxy objects
                if compatibility_mode:
                    assert "ProxyObject" not in str(type(res.compute()))
                res = res.map_partitions(is_proxy_object).compute()
                res = res.to_list()

                if compatibility_mode:
                    assert not any(res)  # No proxy objects
                else:
                    assert all(res)  # Only proxy objects
Esempio n. 6
0
def test_communicating_disk_objects(protocol, shared_fs):
    """Testing disk serialization of cuDF dataframe when communicating"""
    cudf = pytest.importorskip("cudf")
    ProxifyHostFile._spill_shared_filesystem = shared_fs

    def task(x):
        # Check that the subclass survives the trip from client to worker
        assert isinstance(x, _PxyObjTest)
        serializer_used = x._pxy_get().serializer
        if shared_fs:
            assert serializer_used == "disk"
        else:
            assert serializer_used == "dask"

    with dask_cuda.LocalCUDACluster(
            n_workers=1, protocol=protocol,
            enable_tcp_over_ucx=protocol == "ucx") as cluster:
        with Client(cluster) as client:
            df = cudf.DataFrame({"a": range(10)})
            df = proxy_object.asproxy(df,
                                      serializers=("disk", ),
                                      subclass=_PxyObjTest)
            df._pxy_get().assert_on_deserializing = False
            df = client.scatter(df)
            client.submit(task, df).result()
            client.shutdown()  # Avoids a UCX shutdown error
Esempio n. 7
0
def test_spilling_local_cuda_cluster(jit_unspill):
    """Testing spilling of a proxied cudf dataframe in a local cuda cluster"""
    cudf = pytest.importorskip("cudf")

    def task(x):
        assert isinstance(x, cudf.DataFrame)
        if jit_unspill:
            # Check that `x` is a proxy object and the proxied DataFrame is serialized
            assert "ProxyObject" in str(type(x))
            assert x._pxy_get().serializer == "dask"
        else:
            assert type(x) == cudf.DataFrame
        assert len(x) == 10  # Trigger deserialization
        return x

    # Notice, setting `device_memory_limit=1B` to trigger spilling
    with dask_cuda.LocalCUDACluster(n_workers=1,
                                    device_memory_limit="1B",
                                    jit_unspill=jit_unspill) as cluster:
        with Client(cluster):
            df = cudf.DataFrame({"a": range(10)})
            ddf = dask_cudf.from_cudf(df, npartitions=1)
            ddf = ddf.map_partitions(task, meta=df.head())
            got = ddf.compute()
            if isinstance(got, pandas.Series):
                pytest.xfail(
                    "BUG fixed by <https://github.com/rapidsai/dask-cuda/pull/451>"
                )
            assert_frame_equal(got.to_pandas(), df.to_pandas())
Esempio n. 8
0
def _test_jit_unspill(protocol):
    import cudf

    with dask_cuda.LocalCUDACluster(
            protocol=protocol,
            dashboard_address=None,
            n_workers=1,
            threads_per_worker=1,
            jit_unspill=True,
            device_memory_limit="1B",
            enable_tcp_over_ucx=True if protocol == "ucx" else False,
    ) as cluster:
        with Client(cluster):
            np.random.seed(42)
            df = cudf.DataFrame.from_pandas(
                pd.DataFrame({"key": np.random.random(100)}))
            ddf = dd.from_pandas(df.copy(), npartitions=4)
            ddf = explicit_comms_shuffle(ddf, ["key"])

            # Check the values of `ddf` (ignoring the row order)
            expected = df.sort_values("key")
            got = ddf.compute().sort_values("key")
            assert_eq(got, expected)
Esempio n. 9
0
def test_communicating_proxy_objects(protocol, send_serializers):
    """Testing serialization of cuDF dataframe when communicating"""
    cudf = pytest.importorskip("cudf")

    def task(x):
        # Check that the subclass survives the trip from client to worker
        assert isinstance(x, _PxyObjTest)
        serializers_used = list(x._obj_pxy["serializers"])

        # Check that `x` is serialized with the expected serializers
        if protocol == "ucx":
            if send_serializers is None:
                assert serializers_used == ["cuda", "dask", "pickle"]
            else:
                assert serializers_used == send_serializers
        else:
            assert serializers_used == ["dask", "pickle"]

    with dask_cuda.LocalCUDACluster(
            n_workers=1, protocol=protocol,
            enable_tcp_over_ucx=protocol == "ucx") as cluster:
        with Client(cluster) as client:
            df = cudf.DataFrame({"a": range(10)})
            df = proxy_object.asproxy(df,
                                      serializers=send_serializers,
                                      subclass=_PxyObjTest)

            # Notice, in one case we expect deserialization when communicating.
            # Since "tcp" cannot send device memory directly, it will be re-serialized
            # using the default dask serializers that spill the data to main memory.
            if protocol == "tcp" and send_serializers == ["cuda"]:
                df.assert_on_deserializing = False
            else:
                df.assert_on_deserializing = True
            df = client.scatter(df)
            client.submit(task, df).result()
            client.shutdown()  # Avoids a UCX shutdown error