def test_pca_fit(datatype, input_type): X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], dtype=datatype) skpca = skPCA(n_components=2) skpca.fit(X) cupca = cuPCA(n_components=2) if input_type == 'dataframe': gdf = cudf.DataFrame() gdf['0'] = np.asarray([-1, -2, -3, 1, 2, 3], dtype=datatype) gdf['1'] = np.asarray([-1, -1, -2, 1, 1, 2], dtype=datatype) cupca.fit(gdf) else: cupca.fit(X) for attr in [ 'singular_values_', 'components_', 'explained_variance_', 'explained_variance_ratio_', 'noise_variance_' ]: with_sign = False if attr in ['components_'] else True print(attr) print(getattr(cupca, attr)) print(getattr(skpca, attr)) cuml_res = (getattr(cupca, attr)) if isinstance(cuml_res, cudf.Series): cuml_res = cuml_res.to_array() else: cuml_res = cuml_res.as_matrix() skl_res = getattr(skpca, attr) assert array_equal(cuml_res, skl_res, 1e-3, with_sign=with_sign)
def test_ridge(datatype, X_type, y_type, algorithm): X = np.array([[2.0, 5.0], [6.0, 9.0], [2.0, 2.0], [2.0, 3.0]], dtype=datatype) y = np.dot(X, np.array([5.0, 10.0]).astype(datatype)) pred_data = np.array([[3.0, 5.0], [2.0, 5.0]]).astype(datatype) skridge = skRidge(fit_intercept=False, normalize=False) skridge.fit(X, y) curidge = cuRidge(fit_intercept=False, normalize=False, solver=algorithm) if X_type == 'dataframe': gdf = cudf.DataFrame() gdf['0'] = np.asarray([2, 6, 2, 2], dtype=datatype) gdf['1'] = np.asarray([5, 9, 2, 3], dtype=datatype) curidge.fit(gdf, y) elif X_type == 'ndarray': curidge.fit(X, y) sk_predict = skridge.predict(pred_data) cu_predict = curidge.predict(pred_data).to_array() assert array_equal(sk_predict, cu_predict, 1e-3, with_sign=True)
def test_ols(datatype, X_type, y_type, algorithm): X = np.array([[2.0, 5.0], [6.0, 9.0], [2.0, 2.0], [2.0, 3.0]], dtype=datatype) y = np.dot(X, np.array([5.0, 10.0]).astype(datatype)) pred_data = np.array([[3.0, 5.0], [2.0, 5.0]]).astype(datatype) skols = skLinearRegression(fit_intercept=True, normalize=False) skols.fit(X, y) cuols = cuLinearRegression(fit_intercept=True, normalize=False, algorithm=algorithm) if X_type == 'dataframe': gdf = cudf.DataFrame() gdf['0'] = np.asarray([2, 6, 2, 2], dtype=datatype) gdf['1'] = np.asarray([5, 9, 2, 3], dtype=datatype) cuols.fit(gdf, y) elif X_type == 'ndarray': cuols.fit(X, y) sk_predict = skols.predict(pred_data) cu_predict = cuols.predict(pred_data).to_array() print(sk_predict) print(cu_predict) # print(skols.coef_) print(cuols.gdf_datatype) print(y.dtype) assert array_equal(sk_predict, cu_predict, 1e-3, with_sign=True)
def check_table(table_name): original_rows = get_table(table_name, database_settings["original_db_name"]) new_rows = get_table(table_name, database_settings["new_db_name"]) for k in original_rows.keys(): original = original_rows[k] new = new_rows[k] if original[k] != new[k] and (not utils.array_equal(original, new)): print("Change in row {rowid}: was {old}, now {new}".format( rowid=k, old=original_rows[k], new=new_rows[k]))
def test_tsvd_inverse_transform(datatype, input_type): gdf = cudf.DataFrame() gdf['0'] = np.asarray([-1, -2, -3, 1, 2, 3], dtype=datatype) gdf['1'] = np.asarray([-1, -1, -2, 1, 1, 2], dtype=datatype) cutsvd = cuTSVD(n_components=1) if input_type == 'dataframe': Xcutsvd = cutsvd.fit_transform(gdf) else: X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], dtype=datatype) Xcutsvd = cutsvd.fit_transform(X) input_gdf = cutsvd.inverse_transform(Xcutsvd) assert array_equal(input_gdf, gdf, 0.4, with_sign=True)
def test_pca_inverse_transform(datatype, input_type): gdf = cudf.DataFrame() gdf['0'] = np.asarray([-1, -2, -3, 1, 2, 3], dtype=datatype) gdf['1'] = np.asarray([-1, -1, -2, 1, 1, 2], dtype=datatype) cupca = cuPCA(n_components=2) if input_type == 'dataframe': Xcupca = cupca.fit_transform(gdf) else: X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], dtype=datatype) Xcupca = cupca.fit_transform(X) input_gdf = cupca.inverse_transform(Xcupca) assert array_equal(input_gdf, gdf, 1e-3, with_sign=True)
def test_tsvd_fit_transform(datatype, input_type): X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], dtype=datatype) skpca = skTSVD(n_components=1) Xsktsvd = skpca.fit_transform(X) cutsvd = cuTSVD(n_components=1) if input_type == 'dataframe': gdf = cudf.DataFrame() gdf['0'] = np.asarray([-1, -2, -3, 1, 2, 3], dtype=datatype) gdf['1'] = np.asarray([-1, -1, -2, 1, 1, 2], dtype=datatype) Xcutsvd = cutsvd.fit_transform(gdf) else: Xcutsvd = cutsvd.fit_transform(X) assert array_equal(Xcutsvd, Xsktsvd, 1e-3, with_sign=True)
def test_tsvd_fit(datatype, input_type): X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]], dtype=datatype) sktsvd = skTSVD(n_components=1) sktsvd.fit(X) cutsvd = cuTSVD(n_components=1) if input_type == 'dataframe': gdf = cudf.DataFrame() gdf['0'] = np.asarray([-1, -2, -3, 1, 2, 3], dtype=datatype) gdf['1'] = np.asarray([-1, -1, -2, 1, 1, 2], dtype=datatype) cutsvd.fit(gdf) else: cutsvd.fit(X) for attr in ['singular_values_', 'components_', 'explained_variance_ratio_']: with_sign = False if attr in ['components_'] else True assert array_equal(getattr(cutsvd, attr), getattr(sktsvd, attr), 0.4, with_sign=with_sign)