def test_make_blobs(nrows, ncols, centers, cluster_std, dtype, nparts, order, client): c = client nrows = int(nrows) X, y = make_blobs(nrows, ncols, centers=centers, cluster_std=cluster_std, dtype=dtype, n_parts=nparts, order=order, client=client) assert len(X.chunks[0]) == nparts assert len(y.chunks[0]) == nparts assert X.shape == (nrows, ncols) assert y.shape == (nrows, ) y_local = y.compute() assert len(cp.unique(y_local)) == centers X_ddh = DistributedDataHandler.create(data=X, client=c) X_first = X_ddh.gpu_futures[0][1].result() if order == 'F': assert X_first.flags['F_CONTIGUOUS'] elif order == 'C': assert X_first.flags['C_CONTIGUOUS']
def test_extract_partitions_shape(nrows, ncols, n_parts, input_type, colocated, client): adj_input_type = 'dataframe' if input_type == 'series' else input_type X_arr, y_arr = make_blobs(n_samples=nrows, n_features=ncols, n_parts=n_parts) if adj_input_type == "dataframe" or input_type == "dataframe": X = to_dask_cudf(X_arr) y = to_dask_cudf(y_arr) elif input_type == "array": X, y = X_arr, y_arr if input_type == "series": X = X[X.columns[0]] if input_type == "dataframe" or input_type == "series": X_len_parts = X.map_partitions(len).compute() y_len_parts = y.map_partitions(len).compute() elif input_type == "array": X_len_parts = X.chunks[0] y_len_parts = y.chunks[0] if colocated: ddh = DistributedDataHandler.create((X, y), client) parts = [part.result() for worker, part in ddh.gpu_futures] for i in range(len(parts)): assert (parts[i][0].shape[0] == X_len_parts[i]) and ( parts[i][1].shape[0] == y_len_parts[i]) else: ddh = DistributedDataHandler.create(X, client) parts = [part.result() for worker, part in ddh.gpu_futures] for i in range(len(parts)): assert (parts[i].shape[0] == X_len_parts[i])
def test_extract_partitions_worker_list(nrows, ncols, n_parts, input_type, colocated, cluster): client = Client(cluster) try: adj_input_type = 'dataframe' if input_type == 'series' else input_type X, y = make_blobs(n_samples=nrows, n_features=ncols, n_parts=n_parts, output=adj_input_type) if input_type == "series": X = X[X.columns[0]] y = y[y.columns[0]] if colocated: ddh = DistributedDataHandler.create((X, y), client) else: ddh = DistributedDataHandler.create(X, client) parts = list(map(lambda x: x[1], ddh.gpu_futures)) assert len(parts) == n_parts finally: client.close()
def test_extract_partitions_worker_list(nrows, ncols, n_parts, input_type, colocated, cluster): client = Client(cluster) try: adj_input_type = 'dataframe' if input_type == 'series' else input_type X_arr, y_arr = make_blobs(n_samples=int(nrows), n_features=ncols, n_parts=n_parts) if adj_input_type == "dataframe" or input_type == "dataframe": X = to_dask_cudf(X_arr) y = to_dask_cudf(y_arr) elif input_type == "array": X, y = X_arr, y_arr if input_type == "series": X = X[X.columns[0]] if colocated: ddh = DistributedDataHandler.create((X, y), client) else: ddh = DistributedDataHandler.create(X, client) parts = list(map(lambda x: x[1], ddh.gpu_futures)) assert len(parts) == n_parts finally: client.close()
def test_extract_partitions_shape(nrows, ncols, n_parts, input_type, colocated, cluster): client = Client(cluster) try: X, y = make_blobs(nrows=nrows, ncols=ncols, n_parts=n_parts, output=input_type) if input_type == "dataframe": X_len_parts = X.map_partitions(len).compute() y_len_parts = y.map_partitions(len).compute() elif input_type == "array": X_len_parts = X.chunks[0] y_len_parts = y.chunks[0] if colocated: gpu_futures = client.sync(_extract_partitions, (X, y), client) else: gpu_futures = client.sync(_extract_partitions, X, client) parts = [part.result() for worker, part in gpu_futures] if colocated: for i in range(len(parts)): assert (parts[i][0].shape[0] == X_len_parts[i]) and (parts[i][1].shape[0] == y_len_parts[i]) else: for i in range(len(parts)): assert (parts[i].shape[0] == X_len_parts[i]) finally: client.close()
def test_extract_partitions_worker_list(nrows, ncols, n_parts, input_type, colocated, cluster): client = Client(cluster) try: X, y = make_blobs(nrows=nrows, ncols=ncols, n_parts=n_parts, output=input_type) if colocated: gpu_futures = client.sync(_extract_partitions, (X, y), client) else: gpu_futures = client.sync(_extract_partitions, X, client) parts = list(map(lambda x: x[1], gpu_futures)) assert len(parts) == n_parts finally: client.close()
def test_extract_partitions_shape(nrows, ncols, n_parts, input_type, colocated, cluster): client = Client(cluster) try: adj_input_type = 'dataframe' if input_type == 'series' else input_type X, y = make_blobs(n_samples=nrows, n_features=ncols, n_parts=n_parts, output=adj_input_type) if input_type == "series": X = X[X.columns[0]] y = y[y.columns[0]] if input_type == "dataframe" or input_type == "series": X_len_parts = X.map_partitions(len).compute() y_len_parts = y.map_partitions(len).compute() elif input_type == "array": X_len_parts = X.chunks[0] y_len_parts = y.chunks[0] if colocated: ddh = DistributedDataHandler.create((X, y), client) parts = [part.result() for worker, part in ddh.gpu_futures] for i in range(len(parts)): assert (parts[i][0].shape[0] == X_len_parts[i]) and (parts[i][1].shape[0] == y_len_parts[i]) else: ddh = DistributedDataHandler.create(X, client) parts = [part.result() for worker, part in ddh.gpu_futures] for i in range(len(parts)): assert (parts[i].shape[0] == X_len_parts[i]) finally: client.close()