Ejemplo n.º 1
0
def test_pca_fit_transform_fp32(nrows, ncols, n_parts, client=None):

    owns_cluster = False
    if client is None:
        owns_cluster = True
        cluster = LocalCUDACluster(threads_per_worker=1)
        client = Client(cluster)

    from cuml.dask.decomposition import PCA as daskPCA
    from cuml.dask.datasets import make_blobs

    X_cudf, _ = make_blobs(nrows,
                           ncols,
                           1,
                           n_parts,
                           cluster_std=1.5,
                           verbose=False,
                           random_state=10,
                           dtype=np.float32)

    wait(X_cudf)

    cupca = daskPCA(n_components=20, whiten=True)
    cupca.fit_transform(X_cudf)

    if owns_cluster:
        client.close()
        cluster.close()
Ejemplo n.º 2
0
def test_pca_fit(nrows, ncols, n_parts, input_type, cluster):

    client = Client(cluster)

    try:

        from cuml.dask.decomposition import PCA as daskPCA
        from sklearn.decomposition import PCA

        from cuml.dask.datasets import make_blobs

        X, _ = make_blobs(n_samples=nrows,
                          n_features=ncols,
                          centers=1,
                          n_parts=n_parts,
                          cluster_std=0.5,
                          random_state=10,
                          dtype=np.float32)

        wait(X)
        if input_type == "dataframe":
            X_train = to_dask_cudf(X)
            X_cpu = X_train.compute().to_pandas().values
        elif input_type == "array":
            X_train = X
            X_cpu = cp.asnumpy(X_train.compute())

        try:

            cupca = daskPCA(n_components=5, whiten=True)
            cupca.fit(X_train)
        except Exception as e:
            print(str(e))

        skpca = PCA(n_components=5, whiten=True, svd_solver="full")
        skpca.fit(X_cpu)

        from cuml.test.utils import array_equal

        all_attr = [
            'singular_values_', 'components_', 'explained_variance_',
            'explained_variance_ratio_'
        ]

        for attr in all_attr:
            with_sign = False if attr in ['components_'] else True
            cuml_res = (getattr(cupca, attr))
            if type(cuml_res) == np.ndarray:
                cuml_res = cuml_res.as_matrix()
            skl_res = getattr(skpca, attr)
            assert array_equal(cuml_res, skl_res, 1e-1, with_sign=with_sign)
    finally:
        client.close()
Ejemplo n.º 3
0
def test_pca_fit(nrows, ncols, n_parts, client=None):

    owns_cluster = False
    if client is None:
        owns_cluster = True
        cluster = LocalCUDACluster(threads_per_worker=1)
        client = Client(cluster)

    from cuml.dask.decomposition import PCA as daskPCA
    from sklearn.decomposition import PCA

    from cuml.dask.datasets import make_blobs

    X_cudf, _ = make_blobs(nrows,
                           ncols,
                           1,
                           n_parts,
                           cluster_std=0.5,
                           verbose=False,
                           random_state=10,
                           dtype=np.float32)

    wait(X_cudf)

    X = X_cudf.compute().to_pandas().values

    cupca = daskPCA(n_components=5, whiten=True)
    cupca.fit(X_cudf)

    skpca = PCA(n_components=5, whiten=True, svd_solver="full")
    skpca.fit(X)

    from cuml.test.utils import array_equal

    all_attr = [
        'singular_values_', 'components_', 'explained_variance_',
        'explained_variance_ratio_'
    ]

    if owns_cluster:
        client.close()
        cluster.close()

    for attr in all_attr:
        with_sign = False if attr in ['components_'] else True
        cuml_res = (getattr(cupca, attr))
        if type(cuml_res) == np.ndarray:
            cuml_res = cuml_res.as_matrix()
        skl_res = getattr(skpca, attr)
        assert array_equal(cuml_res, skl_res, 1e-3, with_sign=with_sign)
Ejemplo n.º 4
0
def test_pca_fit_transform_fp64(nrows, ncols, n_parts, client):

    from cuml.dask.decomposition import PCA as daskPCA
    from cuml.dask.datasets import make_blobs

    X_cudf, _ = make_blobs(n_samples=nrows,
                           n_features=ncols,
                           centers=1,
                           n_parts=n_parts,
                           cluster_std=1.5,
                           random_state=10, dtype=np.float64)

    cupca = daskPCA(n_components=30, whiten=False)
    res = cupca.fit_transform(X_cudf)
    res = res.compute()
    assert res.shape[0] == nrows and res.shape[1] == 30
Ejemplo n.º 5
0
def test_pca_fit_transform_fp32_noncomponents(nrows, ncols, n_parts, client):
    # Tests the case when n_components is not passed for MG scenarios
    from cuml.dask.decomposition import PCA as daskPCA
    from cuml.dask.datasets import make_blobs

    X_cudf, _ = make_blobs(n_samples=nrows,
                           n_features=ncols,
                           centers=1,
                           n_parts=n_parts,
                           cluster_std=1.5,
                           random_state=10, dtype=np.float32)

    cupca = daskPCA(whiten=False)
    res = cupca.fit_transform(X_cudf)
    res = res.compute()
    assert res.shape[0] == nrows and res.shape[1] == 20
Ejemplo n.º 6
0
def test_pca_fit(nrows, ncols, n_parts, cluster):

    client = Client(cluster)

    try:

        from cuml.dask.decomposition import PCA as daskPCA
        from sklearn.decomposition import PCA

        from cuml.dask.datasets import make_blobs

        X_cudf, _ = make_blobs(nrows, ncols, 1, n_parts,
                               cluster_std=0.5, verbose=False,
                               random_state=10, dtype=np.float32)

        wait(X_cudf)

        print(str(X_cudf.head(3)))

        try:

            cupca = daskPCA(n_components=5, whiten=True)
            cupca.fit(X_cudf)
        except Exception as e:
            print(str(e))

        X = X_cudf.compute().to_pandas().values

        skpca = PCA(n_components=5, whiten=True, svd_solver="full")
        skpca.fit(X)

        from cuml.test.utils import array_equal

        all_attr = ['singular_values_', 'components_',
                    'explained_variance_', 'explained_variance_ratio_']

        for attr in all_attr:
            with_sign = False if attr in ['components_'] else True
            cuml_res = (getattr(cupca, attr))
            if type(cuml_res) == np.ndarray:
                cuml_res = cuml_res.as_matrix()
            skl_res = getattr(skpca, attr)
            assert array_equal(cuml_res, skl_res, 1e-3, with_sign=with_sign)
    finally:
        client.close()
Ejemplo n.º 7
0
def test_pca_fit_transform_fp32(nrows, ncols, n_parts, cluster):

    client = Client(cluster)

    try:
        from cuml.dask.decomposition import PCA as daskPCA
        from cuml.dask.datasets import make_blobs

        X_cudf, _ = make_blobs(nrows, ncols, 1, n_parts,
                               cluster_std=1.5, verbose=False,
                               random_state=10, dtype=np.float32)

        wait(X_cudf)

        cupca = daskPCA(n_components=20, whiten=True)
        cupca.fit_transform(X_cudf)

    finally:
        client.close()
Ejemplo n.º 8
0
def test_pca_fit_transform_fp64(nrows, ncols, n_parts, cluster):

    client = Client(cluster)

    try:
        from cuml.dask.decomposition import PCA as daskPCA
        from cuml.dask.datasets import make_blobs

        X_cudf, _ = make_blobs(n_samples=nrows,
                               n_features=ncols,
                               centers=1,
                               n_parts=n_parts,
                               cluster_std=1.5,
                               random_state=10,
                               dtype=np.float64)

        wait(X_cudf)

        cupca = daskPCA(n_components=30, whiten=False)
        cupca.fit_transform(X_cudf)

    finally:
        client.close()