def test_multinomial_binary():
    """Test multinomial LR on a binary problem."""
    target = (iris.target > 0).astype(np.intp)
    target = np.array(["setosa", "not-setosa"])[target]

    clf = LogisticRegression(solver='lbfgs', multi_class='multinomial')
    clf.fit(iris.data, target)

    assert_equal(clf.coef_.shape, (1, iris.data.shape[1]))
    assert_equal(clf.intercept_.shape, (1,))
    assert_array_equal(clf.predict(iris.data), target)

    mlr = LogisticRegression(solver='lbfgs', multi_class='multinomial',
                             fit_intercept=False)
    mlr.fit(iris.data, target)
    pred = clf.classes_[np.argmax(clf.predict_log_proba(iris.data), axis=1)]
    assert_greater(np.mean(pred == target), .9)
Beispiel #2
0
def test_multinomial_binary():
    """Test multinomial LR on a binary problem."""
    target = (iris.target > 0).astype(np.intp)
    target = np.array(["setosa", "not-setosa"])[target]

    clf = LogisticRegression(solver='lbfgs', multi_class='multinomial')
    clf.fit(iris.data, target)

    assert_equal(clf.coef_.shape, (1, iris.data.shape[1]))
    assert_equal(clf.intercept_.shape, (1,))
    assert_array_equal(clf.predict(iris.data), target)

    mlr = LogisticRegression(solver='lbfgs', multi_class='multinomial',
                             fit_intercept=False)
    mlr.fit(iris.data, target)
    pred = clf.classes_[np.argmax(clf.predict_log_proba(iris.data), axis=1)]
    assert_greater(np.mean(pred == target), .9)
def test_multinomial_binary():
    # Test multinomial LR on a binary problem.
    target = (iris.target > 0).astype(np.intp)
    target = np.array(["setosa", "not-setosa"])[target]

    for solver in ['lbfgs', 'newton-cg', 'sag']:
        clf = LogisticRegression(solver=solver, multi_class='multinomial',
                                 random_state=42, max_iter=2000)
        clf.fit(iris.data, target)

        assert_equal(clf.coef_.shape, (1, iris.data.shape[1]))
        assert_equal(clf.intercept_.shape, (1,))
        assert_array_equal(clf.predict(iris.data), target)

        mlr = LogisticRegression(solver=solver, multi_class='multinomial',
                                 random_state=42, fit_intercept=False)
        mlr.fit(iris.data, target)
        pred = clf.classes_[np.argmax(clf.predict_log_proba(iris.data),
                                      axis=1)]
        assert_greater(np.mean(pred == target), .9)
def test_multinomial_binary():
    # Test multinomial LR on a binary problem.
    target = (iris.target > 0).astype(np.intp)
    target = np.array(["setosa", "not-setosa"])[target]

    for solver in ['lbfgs', 'newton-cg', 'sag']:
        clf = LogisticRegression(solver=solver, multi_class='multinomial',
                                 random_state=42, max_iter=2000)
        clf.fit(iris.data, target)

        assert_equal(clf.coef_.shape, (1, iris.data.shape[1]))
        assert_equal(clf.intercept_.shape, (1,))
        assert_array_equal(clf.predict(iris.data), target)

        mlr = LogisticRegression(solver=solver, multi_class='multinomial',
                                 random_state=42, fit_intercept=False)
        mlr.fit(iris.data, target)
        pred = clf.classes_[np.argmax(clf.predict_log_proba(iris.data),
                                      axis=1)]
        assert_greater(np.mean(pred == target), .9)
def test_fit_credit_backupsklearn():
    df = pd.read_csv("./open_data/creditcard.csv")
    X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
    y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')
    Solver = h2o4gpu.LogisticRegression

    enet_h2o4gpu = Solver(glm_stop_early=False)
    print("h2o4gpu fit()")
    enet_h2o4gpu.fit(X, y)
    print("h2o4gpu predict()")
    print(enet_h2o4gpu.predict(X))
    print("h2o4gpu score()")
    print(enet_h2o4gpu.score(X, y))

    enet = Solver(dual=True, max_iter=100, tol=1E-4, random_state=1234)
    print("h2o4gpu scikit wrapper fit()")
    enet.fit(X, y)
    print("h2o4gpu scikit wrapper predict()")
    print(enet.predict(X))
    print("h2o4gpu scikit wrapper predict_proba()")
    print(enet.predict_proba(X))
    print("h2o4gpu scikit wrapper predict_log_proba()")
    print(enet.predict_log_proba(X))
    print("h2o4gpu scikit wrapper score()")
    print(enet.score(X, y))
    print("h2o4gpu scikit wrapper decision_function()")
    print(enet.decision_function(X))
    print("h2o4gpu scikit wrapper densify()")
    print(enet.densify())
    print("h2o4gpu scikit wrapper sparsify")
    print(enet.sparsify())

    from sklearn.linear_model.logistic import LogisticRegression
    enet_sk = LogisticRegression(dual=True,
                                 max_iter=100,
                                 tol=1E-4,
                                 random_state=1234)
    print("Scikit fit()")
    enet_sk.fit(X, y)
    print("Scikit predict()")
    print(enet_sk.predict(X))
    print("Scikit predict_proba()")
    print(enet_sk.predict_proba(X))
    print("Scikit predict_log_proba()")
    print(enet_sk.predict_log_proba(X))
    print("Scikit score()")
    print(enet_sk.score(X, y))
    print("Scikit decision_function()")
    print(enet_sk.decision_function(X))
    print("Scikit densify()")
    print(enet_sk.densify())
    print("Sciki sparsify")
    print(enet_sk.sparsify())

    enet_sk_coef = csr_matrix(enet_sk.coef_, dtype=np.float32).toarray()
    print(enet_sk.coef_)
    print(enet_sk_coef)
    print(enet.coef_)
    print(enet_sk.intercept_)
    print("Coeffs, intercept, and n_iters should match")
    assert np.allclose(enet.coef_, enet_sk_coef)
    assert np.allclose(enet.intercept_, enet_sk.intercept_)
    assert np.allclose(enet.n_iter_, enet_sk.n_iter_)
    print("Preds should match")
    assert np.allclose(enet.predict_proba(X), enet_sk.predict_proba(X))
    assert np.allclose(enet.predict(X), enet_sk.predict(X))
    assert np.allclose(enet.predict_log_proba(X), enet_sk.predict_log_proba(X))
def test_fit_credit_backupsklearn():
    df = pd.read_csv("./open_data/creditcard.csv")
    X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
    y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')
    Solver = h2o4gpu.LogisticRegression

    enet_h2o4gpu = Solver(glm_stop_early=False)
    print("h2o4gpu fit()")
    enet_h2o4gpu.fit(X, y)
    print("h2o4gpu predict()")
    print(enet_h2o4gpu.predict(X))
    print("h2o4gpu score()")
    print(enet_h2o4gpu.score(X,y))

    enet = Solver(dual=True, max_iter=100, tol=1E-4, intercept_scaling=0.99, random_state=1234)
    print("h2o4gpu scikit wrapper fit()")
    enet.fit(X, y)
    print("h2o4gpu scikit wrapper predict()")
    print(enet.predict(X))
    print("h2o4gpu scikit wrapper predict_proba()")
    print(enet.predict_proba(X))
    print("h2o4gpu scikit wrapper predict_log_proba()")
    print(enet.predict_log_proba(X))
    print("h2o4gpu scikit wrapper score()")
    print(enet.score(X,y))
    print("h2o4gpu scikit wrapper decision_function()")
    print(enet.decision_function(X))
    print("h2o4gpu scikit wrapper densify()")
    print(enet.densify())
    print("h2o4gpu scikit wrapper sparsify")
    print(enet.sparsify())
    
    from sklearn.linear_model.logistic import  LogisticRegression
    enet_sk = LogisticRegression(dual=True, max_iter=100, tol=1E-4, intercept_scaling=0.99, random_state=1234)
    print("Scikit fit()")
    enet_sk.fit(X, y)
    print("Scikit predict()")
    print(enet_sk.predict(X))
    print("Scikit predict_proba()")
    print(enet_sk.predict_proba(X))
    print("Scikit predict_log_proba()")
    print(enet_sk.predict_log_proba(X))
    print("Scikit score()")
    print(enet_sk.score(X,y))
    print("Scikit decision_function()")
    print(enet_sk.decision_function(X))
    print("Scikit densify()")
    print(enet_sk.densify())
    print("Sciki sparsify")
    print(enet_sk.sparsify())

    enet_sk_coef = csr_matrix(enet_sk.coef_, dtype=np.float32).toarray()
    print(enet_sk.coef_)
    print(enet_sk_coef)
    print(enet.coef_)
    print(enet_sk.intercept_)
    print("Coeffs, intercept, and n_iters should match")
    assert np.allclose(enet.coef_, enet_sk_coef)
    assert np.allclose(enet.intercept_, enet_sk.intercept_)
    assert np.allclose(enet.n_iter_, enet_sk.n_iter_)
    print("Preds should match")
    assert np.allclose(enet.predict_proba(X), enet_sk.predict_proba(X))
    assert np.allclose(enet.predict(X), enet_sk.predict(X))
    assert np.allclose(enet.predict_log_proba(X), enet_sk.predict_log_proba(X))