Beispiel #1
0
def test_extract_partitions_shape(nrows, ncols, n_parts, input_type, colocated,
                                  cluster):
    client = Client(cluster)

    try:
        X, y = make_blobs(nrows=nrows,
                          ncols=ncols,
                          n_parts=n_parts,
                          output=input_type)
        if input_type == "dataframe":
            X_len_parts = X.map_partitions(len).compute()
            y_len_parts = y.map_partitions(len).compute()
        elif input_type == "array":
            X_len_parts = X.chunks[0]
            y_len_parts = y.chunks[0]

        if colocated:
            gpu_futures = client.sync(_extract_partitions, (X, y), client)
        else:
            gpu_futures = client.sync(_extract_partitions, X, client)

        parts = [part.result() for worker, part in gpu_futures]

        if colocated:
            for i in range(len(parts)):
                assert (parts[i][0].shape[0]
                        == X_len_parts[i]) and (parts[i][1].shape[0]
                                                == y_len_parts[i])
        else:
            for i in range(len(parts)):
                assert (parts[i].shape[0] == X_len_parts[i])

    finally:
        client.close()
Beispiel #2
0
def test_extract_partitions_futures(nrows, ncols, n_parts, X_delayed,
                                    y_delayed, colocated, cluster):

    client = Client(cluster)
    try:

        X = cp.random.standard_normal((nrows, ncols))
        y = cp.random.standard_normal((nrows, ))

        X = da.from_array(X, chunks=(nrows / n_parts, -1))
        y = da.from_array(y, chunks=(nrows / n_parts, ))

        if not X_delayed:
            X = client.persist(X)
        if not y_delayed:
            y = client.persist(y)

        if colocated:
            gpu_futures = client.sync(_extract_partitions, (X, y), client)
        else:
            gpu_futures = client.sync(_extract_partitions, X, client)

        parts = list(map(lambda x: x[1], gpu_futures))
        assert len(parts) == n_parts

    finally:
        client.close()
Beispiel #3
0
def test_extract_partitions_worker_list(nrows, ncols, n_parts, input_type,
                                        colocated, cluster):
    client = Client(cluster)

    try:
        X, y = make_blobs(nrows=nrows,
                          ncols=ncols,
                          n_parts=n_parts,
                          output=input_type)

        if colocated:
            gpu_futures = client.sync(_extract_partitions, (X, y), client)
        else:
            gpu_futures = client.sync(_extract_partitions, X, client)

        parts = list(map(lambda x: x[1], gpu_futures))
        assert len(parts) == n_parts
    finally:
        client.close()
Beispiel #4
0
def test_to_sp_dask_array(input_type, nrows, ncols, cluster):

    c = Client(cluster)

    try:

        from cuml.dask.common import to_sp_dask_array

        a = cp.sparse.random(nrows, ncols, format='csr', dtype=cp.float32)
        if input_type == "dask_dataframe":
            df = cudf.DataFrame.from_gpu_matrix(a.todense())
            inp = dask_cudf.from_cudf(df, npartitions=2)
        elif input_type == "dask_array":
            inp = dask.array.from_array(a.todense().get())
        elif input_type == "dataframe":
            inp = cudf.DataFrame.from_gpu_matrix(a.todense())
        elif input_type == "scipysparse":
            inp = a.get()
        elif input_type == "cupysparse":
            inp = a
        elif input_type == "numpy":
            inp = a.get().todense()
        elif input_type == "cupy":
            inp = a.todense()

        arr = to_sp_dask_array(inp, c)
        arr.compute_chunk_sizes()

        assert arr.shape == (nrows, ncols)

        # We can't call compute directly on this array yet when it has
        # multiple partitions yet so we will manually concat any
        # potential pieces.
        parts = c.sync(extract_arr_partitions, arr)
        local_parts = cp.vstack([part[1].result().todense()
                                 for part in parts]).get()

        assert array_equal(a.todense().get(), local_parts)

    finally:
        c.close()
Beispiel #5
0
def test_make_classification(n_samples, n_features, hypercube, n_classes,
                             n_clusters_per_class, n_informative,
                             random_state, n_parts, order, cluster):
    client = Client(cluster)
    try:
        from cuml.dask.datasets.classification import make_classification

        X, y = make_classification(n_samples=n_samples, n_features=n_features,
                                   n_classes=n_classes, hypercube=hypercube,
                                   n_clusters_per_class=n_clusters_per_class,
                                   n_informative=n_informative,
                                   random_state=random_state, n_parts=n_parts,
                                   order=order)
        assert(len(X.chunks[0])) == n_parts
        assert(len(X.chunks[1])) == 1

        assert X.shape == (n_samples, n_features)
        assert y.shape == (n_samples, )

        assert len(X.chunks[0]) == n_parts
        assert len(y.chunks[0]) == n_parts

        import cupy as cp
        y_local = y.compute()
        assert len(cp.unique(y_local)) == n_classes

        X_parts = client.sync(_extract_partitions, X)
        X_first = X_parts[0][1].result()

        if order == 'F':
            assert X_first.flags['F_CONTIGUOUS']
        elif order == 'C':
            assert X_first.flags['C_CONTIGUOUS']

    finally:
        client.close()