Exemple #1
0
def test_pca_fit(datatype, input_type):

    X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]],
                 dtype=datatype)
    skpca = skPCA(n_components=2)
    skpca.fit(X)

    cupca = cuPCA(n_components=2)

    if input_type == 'dataframe':
        gdf = cudf.DataFrame()
        gdf['0'] = np.asarray([-1, -2, -3, 1, 2, 3], dtype=datatype)
        gdf['1'] = np.asarray([-1, -1, -2, 1, 1, 2], dtype=datatype)
        cupca.fit(gdf)

    else:
        cupca.fit(X)

    for attr in [
            'singular_values_', 'components_', 'explained_variance_',
            'explained_variance_ratio_', 'noise_variance_'
    ]:
        with_sign = False if attr in ['components_'] else True
        print(attr)
        print(getattr(cupca, attr))
        print(getattr(skpca, attr))
        cuml_res = (getattr(cupca, attr))
        if isinstance(cuml_res, cudf.Series):
            cuml_res = cuml_res.to_array()
        else:
            cuml_res = cuml_res.as_matrix()
        skl_res = getattr(skpca, attr)
        assert array_equal(cuml_res, skl_res, 1e-3, with_sign=with_sign)
Exemple #2
0
def test_pca_inverse_transform(datatype):
    gdf = pygdf.DataFrame()
    gdf['0'] = np.asarray([-1, -2, -3, 1, 2, 3], dtype=datatype)
    gdf['1'] = np.asarray([-1, -1, -2, 1, 1, 2], dtype=datatype)

    cutsvd = cuTSVD(n_components=1)
    Xcutsvd = cutsvd.fit_transform(gdf)

    print("Calling inverse_transform")
    input_gdf = cutsvd.inverse_transform(Xcutsvd)
    print(input_gdf)
    assert array_equal(input_gdf, gdf, 0.4, with_sign=True)
Exemple #3
0
def test_pca_inverse_transform(datatype):
    gdf = cudf.DataFrame()
    gdf['0'] = np.asarray([-1, -2, -3, 1, 2, 3], dtype=datatype)
    gdf['1'] = np.asarray([-1, -1, -2, 1, 1, 2], dtype=datatype)

    cupca = cuPCA(n_components=2)
    Xcupca = cupca.fit_transform(gdf)

    print("Calling inverse_transform")
    input_gdf = cupca.inverse_transform(Xcupca)

    assert array_equal(input_gdf, gdf, 1e-3, with_sign=True)
Exemple #4
0
def test_pca_fit_transform(datatype):
    gdf = pygdf.DataFrame()
    gdf['0']=np.asarray([-1,-2,-3,1,2,3],dtype=datatype)
    gdf['1']=np.asarray([-1,-1,-2,1,1,2],dtype=datatype)

    X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], dtype = datatype)

    print("Calling fit_transform")
    cupca = cuPCA(n_components = 2)
    Xcupca = cupca.fit_transform(gdf)
    skpca = skPCA(n_components = 2)
    Xskpca = skpca.fit_transform(X)

    assert array_equal(Xcupca, Xskpca,
            1e-3,with_sign=False)
Exemple #5
0
def test_pca_fit_transform(datatype):
    gdf = cudf.DataFrame()
    gdf['0'] = np.asarray([-1, -2, -3, 1, 2, 3], dtype=datatype)
    gdf['1'] = np.asarray([-1, -1, -2, 1, 1, 2], dtype=datatype)

    X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]],
                 dtype=datatype)

    print("Calling fit_transform")
    cutsvd = cuTSVD(n_components=1)
    Xcutsvd = cutsvd.fit_transform(gdf)
    sktsvd = skTSVD(n_components=1)
    Xsktsvd = sktsvd.fit_transform(X)

    assert array_equal(Xcutsvd, Xsktsvd, 1e-3, with_sign=False)
Exemple #6
0
def test_tsvd_inverse_transform(datatype, input_type):
    gdf = cudf.DataFrame()
    gdf['0'] = np.asarray([-1, -2, -3, 1, 2, 3], dtype=datatype)
    gdf['1'] = np.asarray([-1, -1, -2, 1, 1, 2], dtype=datatype)
    cutsvd = cuTSVD(n_components=1)

    if input_type == 'dataframe':
        Xcutsvd = cutsvd.fit_transform(gdf)

    else:
        X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]],
                     dtype=datatype)
        Xcutsvd = cutsvd.fit_transform(X)

    input_gdf = cutsvd.inverse_transform(Xcutsvd)
    assert array_equal(input_gdf, gdf, 0.4, with_sign=True)
Exemple #7
0
def test_pca_fit(datatype):
    gdf = pygdf.DataFrame()
    gdf['0']=np.asarray([-1,-2,-3,1,2,3],dtype=datatype)
    gdf['1']=np.asarray([-1,-1,-2,1,1,2],dtype=datatype)

    X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], dtype = datatype)

    print("Calling fit")
    cupca = cuPCA(n_components = 2)
    cupca.fit(gdf)
    skpca = skPCA(n_components = 2)
    skpca.fit(X)

    for attr in ['singular_values_','components_','explained_variance_','explained_variance_ratio_','noise_variance_']:
        with_sign = False if attr in ['components_'] else True
        assert array_equal(getattr(cupca,attr),getattr(skpca,attr),
            1e-3,with_sign=with_sign)
Exemple #8
0
def test_pca_inverse_transform(datatype, input_type):
    gdf = cudf.DataFrame()
    gdf['0'] = np.asarray([-1, -2, -3, 1, 2, 3], dtype=datatype)
    gdf['1'] = np.asarray([-1, -1, -2, 1, 1, 2], dtype=datatype)
    cupca = cuPCA(n_components=2)

    if input_type == 'dataframe':
        Xcupca = cupca.fit_transform(gdf)

    else:
        X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]],
                     dtype=datatype)
        Xcupca = cupca.fit_transform(X)

    input_gdf = cupca.inverse_transform(Xcupca)

    assert array_equal(input_gdf, gdf, 1e-3, with_sign=True)
Exemple #9
0
def test_tsvd_fit_transform(datatype, input_type):
    X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]],
                 dtype=datatype)
    skpca = skTSVD(n_components=1)
    Xsktsvd = skpca.fit_transform(X)

    cutsvd = cuTSVD(n_components=1)

    if input_type == 'dataframe':
        gdf = cudf.DataFrame()
        gdf['0'] = np.asarray([-1, -2, -3, 1, 2, 3], dtype=datatype)
        gdf['1'] = np.asarray([-1, -1, -2, 1, 1, 2], dtype=datatype)
        Xcutsvd = cutsvd.fit_transform(gdf)

    else:
        Xcutsvd = cutsvd.fit_transform(X)

    assert array_equal(Xcutsvd, Xsktsvd, 1e-3, with_sign=True)
def test_dbscan_helper(X, eps, min_samples, threshold, use_assert, test_model):
    dbscan_imp1 = run_dbscan(X, eps, min_samples, model='sklearn')
    print()
    if test_model == 'cuml':
        X = pd2cudf(X)

    dbscan_imp2 = run_dbscan(X, eps, min_samples, model=test_model)
    print()
    for attr in ['labels_']:
        passed = array_equal(getattr(dbscan_imp1, attr),
                             getattr(dbscan_imp2, attr),
                             threshold,
                             with_sign=True)
        message = 'compare pca: %s vs sklearn %s %s' % (
            test_model, attr, 'equal' if passed else 'NOT equal')
        print(message)
        write_log(message)
        if use_assert:
            assert passed, message
    print()
    del dbscan_imp1, dbscan_imp2, X
Exemple #11
0
def test_tsvd_fit(datatype, input_type):
    X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]],
                 dtype=datatype)
    sktsvd = skTSVD(n_components=1)
    sktsvd.fit(X)

    cutsvd = cuTSVD(n_components=1)

    if input_type == 'dataframe':
        gdf = cudf.DataFrame()
        gdf['0'] = np.asarray([-1, -2, -3, 1, 2, 3], dtype=datatype)
        gdf['1'] = np.asarray([-1, -1, -2, 1, 1, 2], dtype=datatype)
        cutsvd.fit(gdf)

    else:
        cutsvd.fit(X)

    for attr in ['singular_values_', 'components_',
                 'explained_variance_ratio_']:
        with_sign = False if attr in ['components_'] else True
        assert array_equal(getattr(cutsvd, attr), getattr(sktsvd, attr),
                           0.4, with_sign=with_sign)
Exemple #12
0
def test_pca_helper(X, n_components, svd_solver, whiten, random_state,
                    threshold, use_assert, test_model):
    pca_imp1 = run_pca(X,
                       n_components,
                       svd_solver,
                       whiten,
                       random_state,
                       model='sklearn')
    print()
    if test_model == 'cuml':
        X = pd2pygdf(X)
    elif test_model == 'h2o4gpu':
        X = np.array(X).astype(np.float32)

    pca_imp2 = run_pca(X,
                       n_components,
                       svd_solver,
                       whiten,
                       random_state,
                       model=test_model)
    print()
    for attr in [
            'singular_values_', 'components_', 'explained_variance_',
            'explained_variance_ratio_', 'transformed_result'
    ]:
        with_sign = False if attr in ['components_', 'transformed_result'
                                      ] else True
        passed = array_equal(getattr(pca_imp1, attr),
                             getattr(pca_imp2, attr),
                             threshold,
                             with_sign=with_sign)
        message = 'compare pca: %s vs sklearn %s %s' % (
            test_model, attr, 'equal' if passed else 'NOT equal')
        print(message)
        write_log(message)
        if use_assert:
            assert passed, message
    print()
    del pca_imp1, pca_imp2, X