Beispiel #1
0
def test_standard_scaler_sparse(failure_logger,
                                sparse_clf_dataset,  # noqa: F811
                                with_std):
    X_np, X = sparse_clf_dataset

    scaler = cuStandardScaler(with_mean=False, with_std=with_std, copy=True)
    t_X = scaler.fit_transform(X)
    scaler.fit_transform(X)
    r_X = scaler.inverse_transform(t_X)
    #  assert type(t_X) == type(X)
    #  assert type(r_X) == type(t_X)
    if cpx.scipy.sparse.issparse(X):
        assert cpx.scipy.sparse.issparse(t_X)
    if scipy.sparse.issparse(X):
        assert scipy.sparse.issparse(t_X)
    if cpx.scipy.sparse.issparse(t_X):
        assert cpx.scipy.sparse.issparse(r_X)
    if scipy.sparse.issparse(t_X):
        assert scipy.sparse.issparse(r_X)

    scaler = skStandardScaler(copy=True, with_mean=False, with_std=with_std)
    sk_t_X = scaler.fit_transform(X_np)
    sk_r_X = scaler.inverse_transform(sk_t_X)

    assert_allclose(t_X, sk_t_X)
    assert_allclose(r_X, sk_r_X)
Beispiel #2
0
def test_column_transformer(
        clf_dataset,
        remainder,  # noqa: F811
        transformer_weights):
    X_np, X = clf_dataset

    sk_selec1 = [0, 2]
    sk_selec2 = [1, 3]
    cu_selec1 = sk_selec1
    cu_selec2 = sk_selec2
    if isinstance(X, (pdDataFrame, cuDataFrame)):
        cu_selec1 = ['c' + str(i) for i in sk_selec1]
        cu_selec2 = ['c' + str(i) for i in sk_selec2]

    cu_transformers = [("scaler", cuStandardScaler(), cu_selec1),
                       ("normalizer", cuNormalizer(), cu_selec2)]

    transformer = cuColumnTransformer(cu_transformers,
                                      remainder=remainder,
                                      transformer_weights=transformer_weights)
    ft_X = transformer.fit_transform(X)
    t_X = transformer.transform(X)
    assert type(t_X) == type(X)

    sk_transformers = [("scaler", skStandardScaler(), sk_selec1),
                       ("normalizer", skNormalizer(), sk_selec2)]

    transformer = skColumnTransformer(sk_transformers,
                                      remainder=remainder,
                                      transformer_weights=transformer_weights)
    sk_t_X = transformer.fit_transform(X_np)

    assert_allclose(ft_X, sk_t_X)
    assert_allclose(t_X, sk_t_X)
Beispiel #3
0
def test_make_column_selector():
    X_np = pdDataFrame({
        'city': ['London', 'London', 'Paris', 'Sallisaw'],
        'rating': [5, 3, 4, 5],
        'temperature': [21., 21., 24., 28.]
    })
    X = cudf.from_pandas(X_np)

    cu_transformers = [("ohe", cuOneHotEncoder(),
                        cu_make_column_selector(dtype_exclude=np.number)),
                       ("scaler", cuStandardScaler(),
                        cu_make_column_selector(dtype_include=np.integer)),
                       ("normalizer", cuNormalizer(),
                        cu_make_column_selector(pattern="temp"))]
    transformer = cuColumnTransformer(cu_transformers, remainder='drop')
    t_X = transformer.fit_transform(X)

    sk_transformers = [("ohe", skOneHotEncoder(),
                        sk_make_column_selector(dtype_exclude=np.number)),
                       ("scaler", skStandardScaler(),
                        sk_make_column_selector(dtype_include=np.integer)),
                       ("normalizer", skNormalizer(),
                        sk_make_column_selector(pattern="temp"))]
    transformer = skColumnTransformer(sk_transformers, remainder='drop')
    sk_t_X = transformer.fit_transform(X_np)

    assert_allclose(t_X, sk_t_X)
    assert type(t_X) == type(X)
Beispiel #4
0
def test_make_column_transformer_sparse(
        sparse_clf_dataset,  # noqa: F811
        remainder,
        sparse_threshold):
    X_np, X = sparse_clf_dataset

    if X.format == 'csc':
        pytest.xfail()
    dataset_density = X.nnz / X.size

    transformer = cu_make_column_transformer(
        (cuStandardScaler(with_mean=False), [0, 2]), (cuNormalizer(), [1, 3]),
        remainder=remainder,
        sparse_threshold=sparse_threshold)

    ft_X = transformer.fit_transform(X)
    t_X = transformer.transform(X)
    if dataset_density < sparse_threshold:
        # Sparse input -> sparse output if dataset_density > sparse_threshold
        # else sparse input -> dense output
        assert type(t_X) == type(X)

    transformer = sk_make_column_transformer(
        (skStandardScaler(with_mean=False), [0, 2]), (skNormalizer(), [1, 3]),
        remainder=remainder,
        sparse_threshold=sparse_threshold)

    sk_t_X = transformer.fit_transform(X_np)

    assert_allclose(ft_X, sk_t_X)
    assert_allclose(t_X, sk_t_X)
Beispiel #5
0
def test_make_column_transformer(clf_dataset, remainder):  # noqa: F811
    X_np, X = clf_dataset

    sk_selec1 = [0, 2]
    sk_selec2 = [1, 3]
    cu_selec1 = sk_selec1
    cu_selec2 = sk_selec2
    if isinstance(X, (pdDataFrame, cuDataFrame)):
        cu_selec1 = ['c' + str(i) for i in sk_selec1]
        cu_selec2 = ['c' + str(i) for i in sk_selec2]

    transformer = cu_make_column_transformer((cuStandardScaler(), cu_selec1),
                                             (cuNormalizer(), cu_selec2),
                                             remainder=remainder)

    ft_X = transformer.fit_transform(X)
    t_X = transformer.transform(X)
    assert type(t_X) == type(X)

    transformer = sk_make_column_transformer((skStandardScaler(), sk_selec1),
                                             (skNormalizer(), sk_selec2),
                                             remainder=remainder)
    sk_t_X = transformer.fit_transform(X_np)

    assert_allclose(ft_X, sk_t_X)
    assert_allclose(t_X, sk_t_X)
Beispiel #6
0
def test_standard_scaler_sparse(sparse_clf_dataset, with_std):  # noqa: F811
    X_np, X = sparse_clf_dataset

    scaler = cuStandardScaler(copy=True, with_mean=False, with_std=with_std)
    t_X = scaler.fit_transform(X)
    r_X = scaler.inverse_transform(t_X)
    assert type(t_X) == type(X)
    assert type(r_X) == type(t_X)

    scaler = skStandardScaler(copy=True, with_mean=False, with_std=with_std)
    sk_t_X = scaler.fit_transform(X_np)
    sk_r_X = scaler.inverse_transform(sk_t_X)

    assert_allclose(t_X, sk_t_X)
    assert_allclose(r_X, sk_r_X)
def test_standard_scaler(failure_logger, clf_dataset,  # noqa: F811
                         with_mean, with_std):
    X_np, X = clf_dataset

    scaler = cuStandardScaler(with_mean=with_mean,
                              with_std=with_std,
                              copy=True)
    t_X = scaler.fit_transform(X)
    r_X = scaler.inverse_transform(t_X)
    assert type(t_X) == type(X)
    assert type(r_X) == type(t_X)

    scaler = skStandardScaler(with_mean=with_mean,
                              with_std=with_std,
                              copy=True)
    sk_t_X = scaler.fit_transform(X_np)
    sk_r_X = scaler.inverse_transform(sk_t_X)

    assert_allclose(t_X, sk_t_X)
    assert_allclose(r_X, sk_r_X)