Example #1
0
def test_logistic_regression_predict_proba(dtype, nrows, column_info,
                                           num_classes, fit_intercept):
    ncols, n_info = column_info
    X_train, X_test, y_train, y_test = \
        make_classification_dataset(datatype=dtype, nrows=nrows,
                                    ncols=ncols, n_info=n_info,
                                    num_classes=num_classes)

    y_train = y_train.astype(dtype)
    y_test = y_test.astype(dtype)

    culog = cuLog(fit_intercept=fit_intercept)
    culog.fit(X_train, y_train)

    if num_classes > 2:
        sklog = skLog(fit_intercept=fit_intercept, solver="lbfgs",
                      multi_class="multinomial")
    else:
        sklog = skLog(fit_intercept=fit_intercept)
    sklog.coef_ = culog.coef_.copy_to_host().T
    if fit_intercept:
        sklog.intercept_ = culog.intercept_.copy_to_host()
    else:
        skLog.intercept_ = 0
    sklog.classes_ = np.arange(num_classes)

    cu_proba = culog.predict_proba(X_test).get()
    sk_proba = sklog.predict_proba(X_test)

    cu_log_proba = culog.predict_log_proba(X_test).get()
    sk_log_proba = sklog.predict_log_proba(X_test)

    assert array_equal(cu_proba, sk_proba)
    assert array_equal(cu_log_proba, sk_log_proba)
Example #2
0
def test_logistic_regression(num_classes, dtype, penalty, l1_ratio,
                             fit_intercept, nrows, column_info, C, tol):
    if penalty in ['l1', 'elasticnet']:
        pytest.xfail("OWL numerical stability is being improved")

    ncols, n_info = column_info
    # Checking sklearn >= 0.21 for testing elasticnet
    sk_check = LooseVersion(str(sklearn.__version__)) >= LooseVersion("0.21.0")
    if not sk_check and penalty == 'elasticnet':
        pytest.skip("Need sklearn > 0.21 for testing logistic with"
                    "elastic net.")

    X_train, X_test, y_train, y_test = \
        make_classification_dataset(datatype=dtype, nrows=nrows,
                                    ncols=ncols, n_info=n_info,
                                    num_classes=num_classes)
    y_train = y_train.astype(dtype)
    y_test = y_test.astype(dtype)
    culog = cuLog(penalty=penalty,
                  l1_ratio=l1_ratio,
                  C=C,
                  fit_intercept=fit_intercept,
                  tol=tol,
                  verbose=0)
    culog.fit(X_train, y_train)

    # Only solver=saga supports elasticnet in scikit
    if penalty in ['elasticnet', 'l1']:
        if sk_check:
            sklog = skLog(penalty=penalty,
                          l1_ratio=l1_ratio,
                          solver='saga',
                          C=C,
                          fit_intercept=fit_intercept,
                          multi_class='auto')
        else:
            sklog = skLog(penalty=penalty,
                          solver='saga',
                          C=C,
                          fit_intercept=fit_intercept,
                          multi_class='auto')
    else:
        sklog = skLog(penalty=penalty,
                      solver='lbfgs',
                      C=C,
                      fit_intercept=fit_intercept,
                      multi_class='auto')

    sklog.fit(X_train, y_train)

    # Setting tolerance to lowest possible per loss to detect regressions
    # as much as possible

    assert culog.score(X_test, y_test) >= sklog.score(X_test, y_test) - 0.06
Example #3
0
def test_logistic_regression_decision_function(
    dtype, nrows, column_info, num_classes, fit_intercept
):
    ncols, n_info = column_info
    X_train, X_test, y_train, y_test = make_classification_dataset(
        datatype=dtype, nrows=nrows, ncols=ncols,
        n_info=n_info, num_classes=num_classes
    )

    y_train = y_train.astype(dtype)
    y_test = y_test.astype(dtype)

    culog = cuLog(fit_intercept=fit_intercept, output_type="numpy")
    culog.fit(X_train, y_train)

    sklog = skLog(fit_intercept=fit_intercept)
    sklog.coef_ = culog.coef_.T
    if fit_intercept:
        sklog.intercept_ = culog.intercept_
    else:
        skLog.intercept_ = 0
    sklog.classes_ = np.arange(num_classes)

    cu_dec_func = culog.decision_function(X_test)
    if num_classes > 2:
        cu_dec_func = cu_dec_func.T
    sk_dec_func = sklog.decision_function(X_test)

    assert array_equal(cu_dec_func, sk_dec_func)
Example #4
0
def test_logistic_regression_model_default(dtype):

    X_train, X_test, y_train, y_test = small_classification_dataset(dtype)
    y_train = y_train.astype(dtype)
    y_test = y_test.astype(dtype)
    culog = cuLog()
    culog.fit(X_train, y_train)
    sklog = skLog(multi_class="auto")

    sklog.fit(X_train, y_train)

    assert culog.score(X_test, y_test) >= sklog.score(X_test, y_test) - 0.022
Example #5
0
def test_logistic_regression_predict_proba(
    dtype, nrows, column_info, num_classes, fit_intercept, sparse_input
):
    ncols, n_info = column_info
    X_train, X_test, y_train, y_test = make_classification_dataset(
        datatype=dtype, nrows=nrows, ncols=ncols,
        n_info=n_info, num_classes=num_classes
    )
    X_train = csr_matrix(X_train) if sparse_input else X_train
    X_test = csr_matrix(X_test) if sparse_input else X_test

    y_train = y_train.astype(dtype)
    y_test = y_test.astype(dtype)

    culog = cuLog(fit_intercept=fit_intercept, output_type="numpy")
    culog.fit(X_train, y_train)

    if num_classes > 2:
        sklog = skLog(
            fit_intercept=fit_intercept,
            solver="lbfgs",
            multi_class="multinomial"
        )
    else:
        sklog = skLog(fit_intercept=fit_intercept)
    sklog.coef_ = culog.coef_.T
    if fit_intercept:
        sklog.intercept_ = culog.intercept_
    else:
        skLog.intercept_ = 0
    sklog.classes_ = np.arange(num_classes)

    cu_proba = culog.predict_proba(X_test)
    sk_proba = sklog.predict_proba(X_test)

    cu_log_proba = culog.predict_log_proba(X_test)
    sk_log_proba = sklog.predict_log_proba(X_test)

    assert array_equal(cu_proba, sk_proba)
    assert array_equal(cu_log_proba, sk_log_proba)
Example #6
0
def test_logistic_regression_weighting(regression_dataset, option,
                                       test_status):
    regression_type, data, coef, output = regression_dataset[test_status]

    class_weight = None
    sample_weight = None
    if option == 'sample_weight':
        n_samples = data.shape[0]
        sample_weight = np.abs(np.random.rand(n_samples))
    elif option == 'class_weight':
        class_weight = np.random.rand(2)
        class_weight = {0: class_weight[0], 1: class_weight[1]}
    elif option == 'balanced':
        class_weight = 'balanced'

    culog = cuLog(fit_intercept=False, class_weight=class_weight)
    culog.fit(data, output, sample_weight=sample_weight)

    sklog = skLog(fit_intercept=False, class_weight=class_weight)
    sklog.fit(data, output, sample_weight=sample_weight)

    skcoef = np.squeeze(sklog.coef_)
    cucoef = np.squeeze(culog.coef_)
    if regression_type == 'binary':
        skcoef /= np.linalg.norm(skcoef)
        cucoef /= np.linalg.norm(cucoef)
        unit_tol = 0.04
        total_tol = 0.08
    elif regression_type.startswith('multiclass'):
        skcoef = skcoef.T
        skcoef /= np.linalg.norm(skcoef, axis=1)[:, None]
        cucoef /= np.linalg.norm(cucoef, axis=1)[:, None]
        unit_tol = 0.2
        total_tol = 0.3

    equality = array_equal(skcoef,
                           cucoef,
                           unit_tol=unit_tol,
                           total_tol=total_tol)
    if not equality:
        print('\ncoef.shape: ', coef.shape)
        print('coef:\n', coef)
        print('cucoef.shape: ', cucoef.shape)
        print('cucoef:\n', cucoef)
    assert equality

    cuOut = culog.predict(data)
    skOut = sklog.predict(data)
    assert array_equal(skOut, cuOut, unit_tol=unit_tol, total_tol=total_tol)
Example #7
0
def test_logistic_regression(
    num_classes, dtype, penalty, l1_ratio,
    fit_intercept, nrows, column_info, C, tol
):
    ncols, n_info = column_info
    # Checking sklearn >= 0.21 for testing elasticnet
    sk_check = LooseVersion(str(sklearn.__version__)) >= LooseVersion("0.21.0")
    if not sk_check and penalty == "elasticnet":
        pytest.skip(
            "Need sklearn > 0.21 for testing logistic with" "elastic net."
        )

    X_train, X_test, y_train, y_test = make_classification_dataset(
        datatype=dtype, nrows=nrows, ncols=ncols,
        n_info=n_info, num_classes=num_classes
    )
    y_train = y_train.astype(dtype)
    y_test = y_test.astype(dtype)
    culog = cuLog(
        penalty=penalty, l1_ratio=l1_ratio, C=C,
        fit_intercept=fit_intercept, tol=tol
    )
    culog.fit(X_train, y_train)

    # Only solver=saga supports elasticnet in scikit
    if penalty in ["elasticnet", "l1"]:
        if sk_check:
            sklog = skLog(
                penalty=penalty,
                l1_ratio=l1_ratio,
                solver="saga",
                C=C,
                fit_intercept=fit_intercept,
                multi_class="auto",
            )
        else:
            sklog = skLog(
                penalty=penalty,
                solver="saga",
                C=C,
                fit_intercept=fit_intercept,
                multi_class="auto",
            )
    else:
        sklog = skLog(
            penalty=penalty,
            solver="lbfgs",
            C=C,
            fit_intercept=fit_intercept,
            multi_class="auto",
        )

    sklog.fit(X_train, y_train)

    # Setting tolerance to lowest possible per loss to detect regressions
    # as much as possible
    cu_preds = culog.predict(X_test)
    tol_test = 0.012
    tol_train = 0.006
    if num_classes == 10 and penalty in ["elasticnet", "l1"]:
        tol_test *= 10
        tol_train *= 10

    assert culog.score(X_train, y_train) >= sklog.score(X_train, y_train) - \
        tol_train
    assert culog.score(X_test, y_test) >= sklog.score(X_test, y_test) - \
        tol_test
    assert len(np.unique(cu_preds)) == len(np.unique(y_test))
Example #8
0
def test_logistic_regression(num_classes, dtype, penalty, l1_ratio,
                             fit_intercept):

    # Checking sklearn >= 0.21 for testing elasticnet
    sk_check = LooseVersion(str(sklearn.__version__)) >= LooseVersion("0.21.0")
    if not sk_check and penalty == 'elasticnet':
        pytest.skip("Need sklearn > 0.21 for testing logistic with"
                    "elastic net.")

    nrows = 100000
    train_rows = np.int32(nrows * 0.8)
    X, y = make_classification(n_samples=nrows,
                               n_features=num_classes,
                               n_redundant=0,
                               n_informative=2)

    X_test = np.asarray(X[train_rows:, 0:]).astype(dtype)
    X_train = np.asarray(X[0:train_rows, :]).astype(dtype)
    y_train = np.asarray(y[0:train_rows, ]).astype(dtype)

    culog = cuLog(penalty=penalty,
                  l1_ratio=l1_ratio,
                  C=5.0,
                  fit_intercept=fit_intercept,
                  tol=1e-8)
    culog.fit(X_train, y_train)

    # Only solver=saga supports elasticnet in scikit
    if penalty in ['elasticnet', 'l1']:
        if sk_check:
            sklog = skLog(penalty=penalty,
                          l1_ratio=l1_ratio,
                          solver='saga',
                          C=5.0,
                          fit_intercept=fit_intercept)
        else:
            sklog = skLog(penalty=penalty,
                          solver='saga',
                          C=5.0,
                          fit_intercept=fit_intercept)
    elif penalty == 'l2':
        sklog = skLog(penalty=penalty,
                      solver='lbfgs',
                      C=5.0,
                      fit_intercept=fit_intercept)
    else:
        if sk_check:
            sklog = skLog(penalty=penalty,
                          solver='lbfgs',
                          C=5.0,
                          fit_intercept=fit_intercept)
        else:
            sklog = skLog(penalty='l2',
                          solver='lbfgs',
                          C=1e9,
                          fit_intercept=fit_intercept)

    sklog.fit(X_train, y_train)

    preds = culog.predict(X_test)
    skpreds = sklog.predict(X_test)

    # Setting tolerance to lowest possible per loss to detect regressions
    # as much as possible
    if penalty in ['elasticnet', 'l1', 'l2']:
        assert np.sum(preds.to_array() != skpreds) / 20000 < 1e-1
    else:
        # This is the only case where cuml and sklearn actually do a similar
        # lbfgs, other cases cuml does owl or sklearn does saga
        assert np.sum(preds.to_array() != skpreds) / 20000 < 1e-3