Ejemplo n.º 1
0
def test_sparse_lasso_not_as_toy_dataset():
    n_samples, n_features, max_iter = 100, 100, 1000
    n_informative = 10

    X, y = make_sparse_data(n_samples, n_features, n_informative)

    X_train, X_test = X[n_samples / 2:], X[:n_samples / 2]
    y_train, y_test = y[n_samples / 2:], y[:n_samples / 2]

    s_clf = SparseLasso(alpha=0.1,
                        fit_intercept=False,
                        max_iter=max_iter,
                        tol=1e-7)
    s_clf.fit(X_train, y_train)
    assert_almost_equal(s_clf.dual_gap_, 0, 4)
    assert s_clf.score(X_test, y_test) > 0.85

    # check the convergence is the same as the dense version
    d_clf = DenseLasso(alpha=0.1,
                       fit_intercept=False,
                       max_iter=max_iter,
                       tol=1e-7)
    d_clf.fit(X_train, y_train)
    assert_almost_equal(d_clf.dual_gap_, 0, 4)
    assert d_clf.score(X_test, y_test) > 0.85

    # check that the coefs are sparse
    assert_equal(np.sum(s_clf.coef_ != 0.0), n_informative)
Ejemplo n.º 2
0
def test_fit_simple_backupsklearn(backend='auto'):
    df = pd.read_csv("./open_data/simple.txt", delim_whitespace=True)
    X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
    y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')
    Solver = h2o4gpu.Lasso

    enet = Solver(glm_stop_early=False, backend=backend)
    print("h2o4gpu fit()")
    enet.fit(X, y)
    print("h2o4gpu predict()")
    print(enet.predict(X))
    print("h2o4gpu score()")
    print(enet.score(X, y))

    enet_wrapper = Solver(positive=True, random_state=1234, backend=backend)
    print("h2o4gpu scikit wrapper fit()")
    enet_wrapper.fit(X, y)
    print("h2o4gpu scikit wrapper predict()")
    print(enet_wrapper.predict(X))
    print("h2o4gpu scikit wrapper score()")
    print(enet_wrapper.score(X, y))

    from sklearn.linear_model.coordinate_descent import Lasso
    enet_sk = Lasso(positive=True, random_state=1234)
    print("Scikit fit()")
    enet_sk.fit(X, y)
    print("Scikit predict()")
    print(enet_sk.predict(X))
    print("Scikit score()")
    print(enet_sk.score(X, y))

    enet_sk_coef = csr_matrix(enet_sk.coef_, dtype=np.float32).toarray()
    enet_sk_sparse_coef = csr_matrix(enet_sk.sparse_coef_,
                                     dtype=np.float32).toarray()

    if backend != 'h2o4gpu':
        print(enet_sk.coef_)
        print(enet_sk.sparse_coef_)

        print(enet_sk_coef)
        print(enet_sk_sparse_coef)

        print(enet_wrapper.coef_)
        print(enet_wrapper.sparse_coef_)

        print(enet_sk.intercept_)
        print(enet_wrapper.intercept_)

        print(enet_sk.n_iter_)
        print(enet_wrapper.n_iter_)

        print(enet_wrapper.time_prepare)
        print(enet_wrapper.time_upload_data)
        print(enet_wrapper.time_fitonly)

        assert np.allclose(enet_wrapper.coef_, enet_sk_coef)
        assert np.allclose(enet_wrapper.intercept_, enet_sk.intercept_)
        assert np.allclose(enet_wrapper.n_iter_, enet_sk.n_iter_)
def test_sparse_lasso_not_as_toy_dataset():
    n_samples = 100
    max_iter = 1000
    n_informative = 10
    X, y = make_sparse_data(n_samples=n_samples, n_informative=n_informative)

    X_train, X_test = X[n_samples / 2:], X[:n_samples / 2]
    y_train, y_test = y[n_samples / 2:], y[:n_samples / 2]

    s_clf = Lasso(alpha=0.1, fit_intercept=False, max_iter=max_iter, tol=1e-7)
    s_clf.fit(X_train, y_train)
    assert_almost_equal(s_clf.dual_gap_, 0, 4)
    assert_greater(s_clf.score(X_test, y_test), 0.85)

    # check the convergence is the same as the dense version
    d_clf = Lasso(alpha=0.1, fit_intercept=False, max_iter=max_iter, tol=1e-7)
    d_clf.fit(X_train.todense(), y_train)
    assert_almost_equal(d_clf.dual_gap_, 0, 4)
    assert_greater(d_clf.score(X_test, y_test), 0.85)

    # check that the coefs are sparse
    assert_equal(np.sum(s_clf.coef_ != 0.0), n_informative)