Ejemplo n.º 1
0
def test_pca_fit(datatype, input_type):

    X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]],
                 dtype=datatype)
    skpca = skPCA(n_components=2)
    skpca.fit(X)

    cupca = cuPCA(n_components=2)

    if input_type == 'dataframe':
        gdf = cudf.DataFrame()
        gdf['0'] = np.asarray([-1, -2, -3, 1, 2, 3], dtype=datatype)
        gdf['1'] = np.asarray([-1, -1, -2, 1, 1, 2], dtype=datatype)
        cupca.fit(gdf)

    else:
        cupca.fit(X)

    for attr in [
            'singular_values_', 'components_', 'explained_variance_',
            'explained_variance_ratio_', 'noise_variance_'
    ]:
        with_sign = False if attr in ['components_'] else True
        print(attr)
        print(getattr(cupca, attr))
        print(getattr(skpca, attr))
        cuml_res = (getattr(cupca, attr))
        if isinstance(cuml_res, cudf.Series):
            cuml_res = cuml_res.to_array()
        else:
            cuml_res = cuml_res.as_matrix()
        skl_res = getattr(skpca, attr)
        assert array_equal(cuml_res, skl_res, 1e-3, with_sign=with_sign)
Ejemplo n.º 2
0
def test_ridge(datatype, X_type, y_type, algorithm):

    X = np.array([[2.0, 5.0], [6.0, 9.0], [2.0, 2.0], [2.0, 3.0]],
                 dtype=datatype)
    y = np.dot(X, np.array([5.0, 10.0]).astype(datatype))

    pred_data = np.array([[3.0, 5.0], [2.0, 5.0]]).astype(datatype)

    skridge = skRidge(fit_intercept=False, normalize=False)
    skridge.fit(X, y)

    curidge = cuRidge(fit_intercept=False, normalize=False, solver=algorithm)

    if X_type == 'dataframe':
        gdf = cudf.DataFrame()
        gdf['0'] = np.asarray([2, 6, 2, 2], dtype=datatype)
        gdf['1'] = np.asarray([5, 9, 2, 3], dtype=datatype)
        curidge.fit(gdf, y)

    elif X_type == 'ndarray':
        curidge.fit(X, y)

    sk_predict = skridge.predict(pred_data)
    cu_predict = curidge.predict(pred_data).to_array()

    assert array_equal(sk_predict, cu_predict, 1e-3, with_sign=True)
Ejemplo n.º 3
0
def test_ols(datatype, X_type, y_type, algorithm):

    X = np.array([[2.0, 5.0], [6.0, 9.0], [2.0, 2.0], [2.0, 3.0]],
                 dtype=datatype)
    y = np.dot(X, np.array([5.0, 10.0]).astype(datatype))

    pred_data = np.array([[3.0, 5.0], [2.0, 5.0]]).astype(datatype)

    skols = skLinearRegression(fit_intercept=True, normalize=False)
    skols.fit(X, y)

    cuols = cuLinearRegression(fit_intercept=True,
                               normalize=False,
                               algorithm=algorithm)

    if X_type == 'dataframe':
        gdf = cudf.DataFrame()
        gdf['0'] = np.asarray([2, 6, 2, 2], dtype=datatype)
        gdf['1'] = np.asarray([5, 9, 2, 3], dtype=datatype)
        cuols.fit(gdf, y)

    elif X_type == 'ndarray':
        cuols.fit(X, y)

    sk_predict = skols.predict(pred_data)
    cu_predict = cuols.predict(pred_data).to_array()

    print(sk_predict)
    print(cu_predict)

    # print(skols.coef_)
    print(cuols.gdf_datatype)
    print(y.dtype)

    assert array_equal(sk_predict, cu_predict, 1e-3, with_sign=True)
Ejemplo n.º 4
0
def check_table(table_name):
    original_rows = get_table(table_name,
                              database_settings["original_db_name"])
    new_rows = get_table(table_name, database_settings["new_db_name"])
    for k in original_rows.keys():
        original = original_rows[k]
        new = new_rows[k]

        if original[k] != new[k] and (not utils.array_equal(original, new)):
            print("Change in row {rowid}: was {old}, now {new}".format(
                rowid=k, old=original_rows[k], new=new_rows[k]))
Ejemplo n.º 5
0
def test_tsvd_inverse_transform(datatype, input_type):
    gdf = cudf.DataFrame()
    gdf['0'] = np.asarray([-1, -2, -3, 1, 2, 3], dtype=datatype)
    gdf['1'] = np.asarray([-1, -1, -2, 1, 1, 2], dtype=datatype)
    cutsvd = cuTSVD(n_components=1)

    if input_type == 'dataframe':
        Xcutsvd = cutsvd.fit_transform(gdf)

    else:
        X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]],
                     dtype=datatype)
        Xcutsvd = cutsvd.fit_transform(X)

    input_gdf = cutsvd.inverse_transform(Xcutsvd)
    assert array_equal(input_gdf, gdf, 0.4, with_sign=True)
Ejemplo n.º 6
0
def test_pca_inverse_transform(datatype, input_type):
    gdf = cudf.DataFrame()
    gdf['0'] = np.asarray([-1, -2, -3, 1, 2, 3], dtype=datatype)
    gdf['1'] = np.asarray([-1, -1, -2, 1, 1, 2], dtype=datatype)
    cupca = cuPCA(n_components=2)

    if input_type == 'dataframe':
        Xcupca = cupca.fit_transform(gdf)

    else:
        X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]],
                     dtype=datatype)
        Xcupca = cupca.fit_transform(X)

    input_gdf = cupca.inverse_transform(Xcupca)

    assert array_equal(input_gdf, gdf, 1e-3, with_sign=True)
Ejemplo n.º 7
0
def test_tsvd_fit_transform(datatype, input_type):
    X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]],
                 dtype=datatype)
    skpca = skTSVD(n_components=1)
    Xsktsvd = skpca.fit_transform(X)

    cutsvd = cuTSVD(n_components=1)

    if input_type == 'dataframe':
        gdf = cudf.DataFrame()
        gdf['0'] = np.asarray([-1, -2, -3, 1, 2, 3], dtype=datatype)
        gdf['1'] = np.asarray([-1, -1, -2, 1, 1, 2], dtype=datatype)
        Xcutsvd = cutsvd.fit_transform(gdf)

    else:
        Xcutsvd = cutsvd.fit_transform(X)

    assert array_equal(Xcutsvd, Xsktsvd, 1e-3, with_sign=True)
Ejemplo n.º 8
0
def test_tsvd_fit(datatype, input_type):
    X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]],
                 dtype=datatype)
    sktsvd = skTSVD(n_components=1)
    sktsvd.fit(X)

    cutsvd = cuTSVD(n_components=1)

    if input_type == 'dataframe':
        gdf = cudf.DataFrame()
        gdf['0'] = np.asarray([-1, -2, -3, 1, 2, 3], dtype=datatype)
        gdf['1'] = np.asarray([-1, -1, -2, 1, 1, 2], dtype=datatype)
        cutsvd.fit(gdf)

    else:
        cutsvd.fit(X)

    for attr in ['singular_values_', 'components_',
                 'explained_variance_ratio_']:
        with_sign = False if attr in ['components_'] else True
        assert array_equal(getattr(cutsvd, attr), getattr(sktsvd, attr),
                           0.4, with_sign=with_sign)