def test_extract_partitions_shape(nrows, ncols, n_parts, input_type, colocated, cluster): client = Client(cluster) try: X, y = make_blobs(nrows=nrows, ncols=ncols, n_parts=n_parts, output=input_type) if input_type == "dataframe": X_len_parts = X.map_partitions(len).compute() y_len_parts = y.map_partitions(len).compute() elif input_type == "array": X_len_parts = X.chunks[0] y_len_parts = y.chunks[0] if colocated: gpu_futures = client.sync(_extract_partitions, (X, y), client) else: gpu_futures = client.sync(_extract_partitions, X, client) parts = [part.result() for worker, part in gpu_futures] if colocated: for i in range(len(parts)): assert (parts[i][0].shape[0] == X_len_parts[i]) and (parts[i][1].shape[0] == y_len_parts[i]) else: for i in range(len(parts)): assert (parts[i].shape[0] == X_len_parts[i]) finally: client.close()
def test_extract_partitions_futures(nrows, ncols, n_parts, X_delayed, y_delayed, colocated, cluster): client = Client(cluster) try: X = cp.random.standard_normal((nrows, ncols)) y = cp.random.standard_normal((nrows, )) X = da.from_array(X, chunks=(nrows / n_parts, -1)) y = da.from_array(y, chunks=(nrows / n_parts, )) if not X_delayed: X = client.persist(X) if not y_delayed: y = client.persist(y) if colocated: gpu_futures = client.sync(_extract_partitions, (X, y), client) else: gpu_futures = client.sync(_extract_partitions, X, client) parts = list(map(lambda x: x[1], gpu_futures)) assert len(parts) == n_parts finally: client.close()
def test_extract_partitions_worker_list(nrows, ncols, n_parts, input_type, colocated, cluster): client = Client(cluster) try: X, y = make_blobs(nrows=nrows, ncols=ncols, n_parts=n_parts, output=input_type) if colocated: gpu_futures = client.sync(_extract_partitions, (X, y), client) else: gpu_futures = client.sync(_extract_partitions, X, client) parts = list(map(lambda x: x[1], gpu_futures)) assert len(parts) == n_parts finally: client.close()
def test_to_sp_dask_array(input_type, nrows, ncols, cluster): c = Client(cluster) try: from cuml.dask.common import to_sp_dask_array a = cp.sparse.random(nrows, ncols, format='csr', dtype=cp.float32) if input_type == "dask_dataframe": df = cudf.DataFrame.from_gpu_matrix(a.todense()) inp = dask_cudf.from_cudf(df, npartitions=2) elif input_type == "dask_array": inp = dask.array.from_array(a.todense().get()) elif input_type == "dataframe": inp = cudf.DataFrame.from_gpu_matrix(a.todense()) elif input_type == "scipysparse": inp = a.get() elif input_type == "cupysparse": inp = a elif input_type == "numpy": inp = a.get().todense() elif input_type == "cupy": inp = a.todense() arr = to_sp_dask_array(inp, c) arr.compute_chunk_sizes() assert arr.shape == (nrows, ncols) # We can't call compute directly on this array yet when it has # multiple partitions yet so we will manually concat any # potential pieces. parts = c.sync(extract_arr_partitions, arr) local_parts = cp.vstack([part[1].result().todense() for part in parts]).get() assert array_equal(a.todense().get(), local_parts) finally: c.close()
def test_make_classification(n_samples, n_features, hypercube, n_classes, n_clusters_per_class, n_informative, random_state, n_parts, order, cluster): client = Client(cluster) try: from cuml.dask.datasets.classification import make_classification X, y = make_classification(n_samples=n_samples, n_features=n_features, n_classes=n_classes, hypercube=hypercube, n_clusters_per_class=n_clusters_per_class, n_informative=n_informative, random_state=random_state, n_parts=n_parts, order=order) assert(len(X.chunks[0])) == n_parts assert(len(X.chunks[1])) == 1 assert X.shape == (n_samples, n_features) assert y.shape == (n_samples, ) assert len(X.chunks[0]) == n_parts assert len(y.chunks[0]) == n_parts import cupy as cp y_local = y.compute() assert len(cp.unique(y_local)) == n_classes X_parts = client.sync(_extract_partitions, X) X_first = X_parts[0][1].result() if order == 'F': assert X_first.flags['F_CONTIGUOUS'] elif order == 'C': assert X_first.flags['C_CONTIGUOUS'] finally: client.close()