Beispiel #1
0
def test_make_blobs(nrows, ncols, centers, cluster_std, dtype, nparts, order,
                    client):

    c = client

    nrows = int(nrows)
    X, y = make_blobs(nrows,
                      ncols,
                      centers=centers,
                      cluster_std=cluster_std,
                      dtype=dtype,
                      n_parts=nparts,
                      order=order,
                      client=client)

    assert len(X.chunks[0]) == nparts
    assert len(y.chunks[0]) == nparts

    assert X.shape == (nrows, ncols)
    assert y.shape == (nrows, )

    y_local = y.compute()
    assert len(cp.unique(y_local)) == centers

    X_ddh = DistributedDataHandler.create(data=X, client=c)
    X_first = X_ddh.gpu_futures[0][1].result()

    if order == 'F':
        assert X_first.flags['F_CONTIGUOUS']
    elif order == 'C':
        assert X_first.flags['C_CONTIGUOUS']
Beispiel #2
0
def test_extract_partitions_shape(nrows, ncols, n_parts, input_type,
                                  colocated, client):
    adj_input_type = 'dataframe' if input_type == 'series' else input_type

    X_arr, y_arr = make_blobs(n_samples=nrows, n_features=ncols,
                              n_parts=n_parts)

    if adj_input_type == "dataframe" or input_type == "dataframe":
        X = to_dask_cudf(X_arr)
        y = to_dask_cudf(y_arr)
    elif input_type == "array":
        X, y = X_arr, y_arr

    if input_type == "series":
        X = X[X.columns[0]]

    if input_type == "dataframe" or input_type == "series":
        X_len_parts = X.map_partitions(len).compute()
        y_len_parts = y.map_partitions(len).compute()
    elif input_type == "array":
        X_len_parts = X.chunks[0]
        y_len_parts = y.chunks[0]

    if colocated:
        ddh = DistributedDataHandler.create((X, y), client)
        parts = [part.result() for worker, part in ddh.gpu_futures]
        for i in range(len(parts)):
            assert (parts[i][0].shape[0] == X_len_parts[i]) and (
                    parts[i][1].shape[0] == y_len_parts[i])
    else:
        ddh = DistributedDataHandler.create(X, client)
        parts = [part.result() for worker, part in ddh.gpu_futures]
        for i in range(len(parts)):
            assert (parts[i].shape[0] == X_len_parts[i])
Beispiel #3
0
def test_extract_partitions_worker_list(nrows, ncols, n_parts, input_type,
                                        colocated, cluster):
    client = Client(cluster)

    try:
        adj_input_type = 'dataframe' if input_type == 'series' else input_type

        X, y = make_blobs(n_samples=nrows,
                          n_features=ncols,
                          n_parts=n_parts,
                          output=adj_input_type)

        if input_type == "series":
            X = X[X.columns[0]]
            y = y[y.columns[0]]

        if colocated:
            ddh = DistributedDataHandler.create((X, y), client)
        else:
            ddh = DistributedDataHandler.create(X, client)

        parts = list(map(lambda x: x[1], ddh.gpu_futures))
        assert len(parts) == n_parts
    finally:
        client.close()
Beispiel #4
0
def test_extract_partitions_worker_list(nrows, ncols, n_parts, input_type,
                                        colocated, cluster):
    client = Client(cluster)

    try:

        adj_input_type = 'dataframe' if input_type == 'series' else input_type

        X_arr, y_arr = make_blobs(n_samples=int(nrows),
                                  n_features=ncols,
                                  n_parts=n_parts)

        if adj_input_type == "dataframe" or input_type == "dataframe":
            X = to_dask_cudf(X_arr)
            y = to_dask_cudf(y_arr)
        elif input_type == "array":
            X, y = X_arr, y_arr

        if input_type == "series":
            X = X[X.columns[0]]

        if colocated:
            ddh = DistributedDataHandler.create((X, y), client)
        else:
            ddh = DistributedDataHandler.create(X, client)

        parts = list(map(lambda x: x[1], ddh.gpu_futures))
        assert len(parts) == n_parts
    finally:
        client.close()
Beispiel #5
0
def test_extract_partitions_shape(nrows, ncols, n_parts, input_type, colocated,
                                  cluster):
    client = Client(cluster)

    try:
        X, y = make_blobs(nrows=nrows,
                          ncols=ncols,
                          n_parts=n_parts,
                          output=input_type)
        if input_type == "dataframe":
            X_len_parts = X.map_partitions(len).compute()
            y_len_parts = y.map_partitions(len).compute()
        elif input_type == "array":
            X_len_parts = X.chunks[0]
            y_len_parts = y.chunks[0]

        if colocated:
            gpu_futures = client.sync(_extract_partitions, (X, y), client)
        else:
            gpu_futures = client.sync(_extract_partitions, X, client)

        parts = [part.result() for worker, part in gpu_futures]

        if colocated:
            for i in range(len(parts)):
                assert (parts[i][0].shape[0]
                        == X_len_parts[i]) and (parts[i][1].shape[0]
                                                == y_len_parts[i])
        else:
            for i in range(len(parts)):
                assert (parts[i].shape[0] == X_len_parts[i])

    finally:
        client.close()
Beispiel #6
0
def test_extract_partitions_worker_list(nrows, ncols, n_parts, input_type,
                                        colocated, cluster):
    client = Client(cluster)

    try:
        X, y = make_blobs(nrows=nrows,
                          ncols=ncols,
                          n_parts=n_parts,
                          output=input_type)

        if colocated:
            gpu_futures = client.sync(_extract_partitions, (X, y), client)
        else:
            gpu_futures = client.sync(_extract_partitions, X, client)

        parts = list(map(lambda x: x[1], gpu_futures))
        assert len(parts) == n_parts
    finally:
        client.close()
Beispiel #7
0
def test_extract_partitions_shape(nrows, ncols, n_parts, input_type, colocated,
                                  cluster):
    client = Client(cluster)

    try:
        adj_input_type = 'dataframe' if input_type == 'series' else input_type

        X, y = make_blobs(n_samples=nrows,
                          n_features=ncols,
                          n_parts=n_parts,
                          output=adj_input_type)

        if input_type == "series":
            X = X[X.columns[0]]
            y = y[y.columns[0]]

        if input_type == "dataframe" or input_type == "series":
            X_len_parts = X.map_partitions(len).compute()
            y_len_parts = y.map_partitions(len).compute()
        elif input_type == "array":
            X_len_parts = X.chunks[0]
            y_len_parts = y.chunks[0]

        if colocated:
            ddh = DistributedDataHandler.create((X, y), client)
            parts = [part.result() for worker, part in ddh.gpu_futures]
            for i in range(len(parts)):
                assert (parts[i][0].shape[0]
                        == X_len_parts[i]) and (parts[i][1].shape[0]
                                                == y_len_parts[i])
        else:
            ddh = DistributedDataHandler.create(X, client)
            parts = [part.result() for worker, part in ddh.gpu_futures]
            for i in range(len(parts)):
                assert (parts[i].shape[0] == X_len_parts[i])

    finally:
        client.close()