def test_sparsify():
    # Test sparsify and densify members.
    n_samples, n_features = iris.data.shape
    target = iris.target_names[iris.target]
    clf = LogisticRegression(random_state=0).fit(iris.data, target)

    pred_d_d = clf.decision_function(iris.data)

    clf.sparsify()
    assert_true(sp.issparse(clf.coef_))
    pred_s_d = clf.decision_function(iris.data)

    sp_data = sp.coo_matrix(iris.data)
    pred_s_s = clf.decision_function(sp_data)

    clf.densify()
    pred_d_s = clf.decision_function(sp_data)

    assert_array_almost_equal(pred_d_d, pred_s_d)
    assert_array_almost_equal(pred_d_d, pred_s_s)
    assert_array_almost_equal(pred_d_d, pred_d_s)
Beispiel #2
0
def test_sparsify():
    # Test sparsify and densify members.
    n_samples, n_features = iris.data.shape
    target = iris.target_names[iris.target]
    clf = LogisticRegression(random_state=0).fit(iris.data, target)

    pred_d_d = clf.decision_function(iris.data)

    clf.sparsify()
    assert_true(sp.issparse(clf.coef_))
    pred_s_d = clf.decision_function(iris.data)

    sp_data = sp.coo_matrix(iris.data)
    pred_s_s = clf.decision_function(sp_data)

    clf.densify()
    pred_d_s = clf.decision_function(sp_data)

    assert_array_almost_equal(pred_d_d, pred_s_d)
    assert_array_almost_equal(pred_d_d, pred_s_s)
    assert_array_almost_equal(pred_d_d, pred_d_s)
def test_fit_credit_backupsklearn():
    df = pd.read_csv("./open_data/creditcard.csv")
    X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
    y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')
    Solver = h2o4gpu.LogisticRegression

    enet_h2o4gpu = Solver(glm_stop_early=False)
    print("h2o4gpu fit()")
    enet_h2o4gpu.fit(X, y)
    print("h2o4gpu predict()")
    print(enet_h2o4gpu.predict(X))
    print("h2o4gpu score()")
    print(enet_h2o4gpu.score(X, y))

    enet = Solver(dual=True, max_iter=100, tol=1E-4, random_state=1234)
    print("h2o4gpu scikit wrapper fit()")
    enet.fit(X, y)
    print("h2o4gpu scikit wrapper predict()")
    print(enet.predict(X))
    print("h2o4gpu scikit wrapper predict_proba()")
    print(enet.predict_proba(X))
    print("h2o4gpu scikit wrapper predict_log_proba()")
    print(enet.predict_log_proba(X))
    print("h2o4gpu scikit wrapper score()")
    print(enet.score(X, y))
    print("h2o4gpu scikit wrapper decision_function()")
    print(enet.decision_function(X))
    print("h2o4gpu scikit wrapper densify()")
    print(enet.densify())
    print("h2o4gpu scikit wrapper sparsify")
    print(enet.sparsify())

    from sklearn.linear_model.logistic import LogisticRegression
    enet_sk = LogisticRegression(dual=True,
                                 max_iter=100,
                                 tol=1E-4,
                                 random_state=1234)
    print("Scikit fit()")
    enet_sk.fit(X, y)
    print("Scikit predict()")
    print(enet_sk.predict(X))
    print("Scikit predict_proba()")
    print(enet_sk.predict_proba(X))
    print("Scikit predict_log_proba()")
    print(enet_sk.predict_log_proba(X))
    print("Scikit score()")
    print(enet_sk.score(X, y))
    print("Scikit decision_function()")
    print(enet_sk.decision_function(X))
    print("Scikit densify()")
    print(enet_sk.densify())
    print("Sciki sparsify")
    print(enet_sk.sparsify())

    enet_sk_coef = csr_matrix(enet_sk.coef_, dtype=np.float32).toarray()
    print(enet_sk.coef_)
    print(enet_sk_coef)
    print(enet.coef_)
    print(enet_sk.intercept_)
    print("Coeffs, intercept, and n_iters should match")
    assert np.allclose(enet.coef_, enet_sk_coef)
    assert np.allclose(enet.intercept_, enet_sk.intercept_)
    assert np.allclose(enet.n_iter_, enet_sk.n_iter_)
    print("Preds should match")
    assert np.allclose(enet.predict_proba(X), enet_sk.predict_proba(X))
    assert np.allclose(enet.predict(X), enet_sk.predict(X))
    assert np.allclose(enet.predict_log_proba(X), enet_sk.predict_log_proba(X))
def test_fit_credit_backupsklearn():
    df = pd.read_csv("./open_data/creditcard.csv")
    X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
    y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')
    Solver = h2o4gpu.LogisticRegression

    enet_h2o4gpu = Solver(glm_stop_early=False)
    print("h2o4gpu fit()")
    enet_h2o4gpu.fit(X, y)
    print("h2o4gpu predict()")
    print(enet_h2o4gpu.predict(X))
    print("h2o4gpu score()")
    print(enet_h2o4gpu.score(X,y))

    enet = Solver(dual=True, max_iter=100, tol=1E-4, intercept_scaling=0.99, random_state=1234)
    print("h2o4gpu scikit wrapper fit()")
    enet.fit(X, y)
    print("h2o4gpu scikit wrapper predict()")
    print(enet.predict(X))
    print("h2o4gpu scikit wrapper predict_proba()")
    print(enet.predict_proba(X))
    print("h2o4gpu scikit wrapper predict_log_proba()")
    print(enet.predict_log_proba(X))
    print("h2o4gpu scikit wrapper score()")
    print(enet.score(X,y))
    print("h2o4gpu scikit wrapper decision_function()")
    print(enet.decision_function(X))
    print("h2o4gpu scikit wrapper densify()")
    print(enet.densify())
    print("h2o4gpu scikit wrapper sparsify")
    print(enet.sparsify())
    
    from sklearn.linear_model.logistic import  LogisticRegression
    enet_sk = LogisticRegression(dual=True, max_iter=100, tol=1E-4, intercept_scaling=0.99, random_state=1234)
    print("Scikit fit()")
    enet_sk.fit(X, y)
    print("Scikit predict()")
    print(enet_sk.predict(X))
    print("Scikit predict_proba()")
    print(enet_sk.predict_proba(X))
    print("Scikit predict_log_proba()")
    print(enet_sk.predict_log_proba(X))
    print("Scikit score()")
    print(enet_sk.score(X,y))
    print("Scikit decision_function()")
    print(enet_sk.decision_function(X))
    print("Scikit densify()")
    print(enet_sk.densify())
    print("Sciki sparsify")
    print(enet_sk.sparsify())

    enet_sk_coef = csr_matrix(enet_sk.coef_, dtype=np.float32).toarray()
    print(enet_sk.coef_)
    print(enet_sk_coef)
    print(enet.coef_)
    print(enet_sk.intercept_)
    print("Coeffs, intercept, and n_iters should match")
    assert np.allclose(enet.coef_, enet_sk_coef)
    assert np.allclose(enet.intercept_, enet_sk.intercept_)
    assert np.allclose(enet.n_iter_, enet_sk.n_iter_)
    print("Preds should match")
    assert np.allclose(enet.predict_proba(X), enet_sk.predict_proba(X))
    assert np.allclose(enet.predict(X), enet_sk.predict(X))
    assert np.allclose(enet.predict_log_proba(X), enet_sk.predict_log_proba(X))