def test_overrided_gram_matrix():
    X, y, _, _ = build_dataset(n_samples=20, n_features=10)
    Gram = X.T.dot(X)
    clf = ElasticNet(selection='cyclic',
                     tol=1e-8,
                     precompute=Gram,
                     fit_intercept=True)
    assert_warns_message(
        UserWarning, "Gram matrix was provided but X was centered"
        " to fit intercept, "
        "or X was normalized : recomputing Gram matrix.", clf.fit, X, y)
def test_warm_start_convergence_with_regularizer_decrement():
    X, y = load_boston(return_X_y=True)

    # Train a model to converge on a lightly regularized problem
    final_alpha = 1e-5
    low_reg_model = ElasticNet(alpha=final_alpha).fit(X, y)

    # Fitting a new model on a more regularized version of the same problem.
    # Fitting with high regularization is easier it should converge faster
    # in general.
    high_reg_model = ElasticNet(alpha=final_alpha * 10).fit(X, y)
    assert low_reg_model.n_iter_ > high_reg_model.n_iter_

    # Fit the solution to the original, less regularized version of the
    # problem but from the solution of the highly regularized variant of
    # the problem as a better starting point. This should also converge
    # faster than the original model that starts from zero.
    warm_low_reg_model = deepcopy(high_reg_model)
    warm_low_reg_model.set_params(warm_start=True, alpha=final_alpha)
    warm_low_reg_model.fit(X, y)
    assert low_reg_model.n_iter_ > warm_low_reg_model.n_iter_
def test_sparse_input_convergence_warning():
    X, y, _, _ = build_dataset(n_samples=1000, n_features=500)

    with pytest.warns(ConvergenceWarning):
        ElasticNet(max_iter=1, tol=0).fit(
            sparse.csr_matrix(X, dtype=np.float32), y)

    # check that the model converges w/o warnings
    with pytest.warns(None) as record:
        Lasso(max_iter=1000).fit(sparse.csr_matrix(X, dtype=np.float32), y)

    assert not record.list
def test_enet_float_precision():
    # Generate dataset
    X, y, X_test, y_test = build_dataset(n_samples=20, n_features=10)
    # Here we have a small number of iterations, and thus the
    # ElasticNet might not converge. This is to speed up tests

    for normalize in [True, False]:
        for fit_intercept in [True, False]:
            coef = {}
            intercept = {}
            for dtype in [np.float64, np.float32]:
                clf = ElasticNet(alpha=0.5, max_iter=100, precompute=False,
                                 fit_intercept=fit_intercept,
                                 normalize=normalize)

                X = dtype(X)
                y = dtype(y)
                ignore_warnings(clf.fit)(X, y)

                coef[dtype] = clf.coef_
                intercept[dtype] = clf.intercept_

                assert_equal(clf.coef_.dtype, dtype)

                # test precompute Gram array
                Gram = X.T.dot(X)
                clf_precompute = ElasticNet(alpha=0.5, max_iter=100,
                                            precompute=Gram,
                                            fit_intercept=fit_intercept,
                                            normalize=normalize)
                ignore_warnings(clf_precompute.fit)(X, y)
                assert_array_almost_equal(clf.coef_, clf_precompute.coef_)
                assert_array_almost_equal(clf.intercept_,
                                          clf_precompute.intercept_)

            assert_array_almost_equal(coef[np.float32], coef[np.float64],
                                      decimal=4)
            assert_array_almost_equal(intercept[np.float32],
                                      intercept[np.float64],
                                      decimal=4)
def test_same_multiple_output_sparse_dense():
    for normalize in [True, False]:
        l = ElasticNet(normalize=normalize)
        X = [[0, 1, 2, 3, 4],
             [0, 2, 5, 8, 11],
             [9, 10, 11, 12, 13],
             [10, 11, 12, 13, 14]]
        y = [[1, 2, 3, 4, 5],
             [1, 3, 6, 9, 12],
             [10, 11, 12, 13, 14],
             [11, 12, 13, 14, 15]]
        ignore_warnings(l.fit)(X, y)
        sample = np.array([1, 2, 3, 4, 5]).reshape(1, -1)
        predict_dense = l.predict(sample)

        l_sp = ElasticNet(normalize=normalize)
        X_sp = sp.coo_matrix(X)
        ignore_warnings(l_sp.fit)(X_sp, y)
        sample_sparse = sp.coo_matrix(sample)
        predict_sparse = l_sp.predict(sample_sparse)

        assert_array_almost_equal(predict_sparse, predict_dense)
Exemple #6
0
def test_enet_toy_explicit_sparse_input():
    """Test ElasticNet for various values of alpha and l1_ratio with sparse
    X"""
    f = ignore_warnings
    # training samples
    X = sp.lil_matrix((3, 1))
    X[0, 0] = -1
    # X[1, 0] = 0
    X[2, 0] = 1
    Y = [-1, 0, 1]  # just a straight line (the identity function)

    # test samples
    T = sp.lil_matrix((3, 1))
    T[0, 0] = 2
    T[1, 0] = 3
    T[2, 0] = 4

    # this should be the same as lasso
    clf = ElasticNet(alpha=0, l1_ratio=1.0)
    f(clf.fit)(X, Y)
    pred = clf.predict(T)
    assert_array_almost_equal(clf.coef_, [1])
    assert_array_almost_equal(pred, [2, 3, 4])
    assert_almost_equal(clf.dual_gap_, 0)

    clf = ElasticNet(alpha=0.5, l1_ratio=0.3, max_iter=1000)
    clf.fit(X, Y)
    pred = clf.predict(T)
    assert_array_almost_equal(clf.coef_, [0.50819], decimal=3)
    assert_array_almost_equal(pred, [1.0163, 1.5245, 2.0327], decimal=3)
    assert_almost_equal(clf.dual_gap_, 0)

    clf = ElasticNet(alpha=0.5, l1_ratio=0.5)
    clf.fit(X, Y)
    pred = clf.predict(T)
    assert_array_almost_equal(clf.coef_, [0.45454], 3)
    assert_array_almost_equal(pred, [0.9090, 1.3636, 1.8181], 3)
    assert_almost_equal(clf.dual_gap_, 0)
def test_enet_multitarget():
    n_targets = 3
    X, y, _, _ = build_dataset(n_samples=10, n_features=8,
                               n_informative_features=10, n_targets=n_targets)
    estimator = ElasticNet(alpha=0.01, fit_intercept=True)
    estimator.fit(X, y)
    coef, intercept, dual_gap = (estimator.coef_, estimator.intercept_,
                                 estimator.dual_gap_)

    for k in range(n_targets):
        estimator.fit(X, y[:, k])
        assert_array_almost_equal(coef[k, :], estimator.coef_)
        assert_array_almost_equal(intercept[k], estimator.intercept_)
        assert_array_almost_equal(dual_gap[k], estimator.dual_gap_)
Exemple #8
0
def test_precompute_invalid_argument():
    X, y, _, _ = build_dataset()
    for clf in [
            ElasticNetCV(precompute="invalid"),
            LassoCV(precompute="invalid")
    ]:
        assert_raises_regex(
            ValueError, ".*should be.*True.*False.*auto.*"
            "array-like.*Got 'invalid'", clf.fit, X, y)

    # Precompute = 'auto' is not supported for ElasticNet
    assert_raises_regex(ValueError, ".*should be.*True.*False.*array-like.*"
                        "Got 'auto'",
                        ElasticNet(precompute='auto').fit, X, y)
Exemple #9
0
def test_enet_multitarget():
    n_targets = 3
    X, y = make_sparse_data(n_targets=n_targets)

    estimator = ElasticNet(alpha=0.01, fit_intercept=True, precompute=None)
    # XXX: There is a bug when precompute is not None!
    estimator.fit(X, y)
    coef, intercept, dual_gap = (estimator.coef_, estimator.intercept_,
                                 estimator.dual_gap_)

    for k in range(n_targets):
        estimator.fit(X, y[:, k])
        assert_array_almost_equal(coef[k, :], estimator.coef_)
        assert_array_almost_equal(intercept[k], estimator.intercept_)
        assert_array_almost_equal(dual_gap[k], estimator.dual_gap_)
Exemple #10
0
def test_fit_simple_backupsklearn():
    df = pd.read_csv("./open_data/simple.txt", delim_whitespace=True)
    X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
    y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')
    Solver = h2o4gpu.ElasticNet

    enet = Solver(glm_stop_early=False)
    print("h2o4gpu fit()")
    enet.fit(X, y)
    print("h2o4gpu predict()")
    print(enet.predict(X))
    print("h2o4gpu score()")
    print(enet.score(X, y))

    enet_wrapper = Solver(positive=True, random_state=1234)
    print("h2o4gpu scikit wrapper fit()")
    enet_wrapper.fit(X, y)
    print("h2o4gpu scikit wrapper predict()")
    print(enet_wrapper.predict(X))
    print("h2o4gpu scikit wrapper score()")
    print(enet_wrapper.score(X, y))

    from sklearn.linear_model.coordinate_descent import ElasticNet
    enet_sk = ElasticNet(positive=True, random_state=1234)
    print("Scikit fit()")
    enet_sk.fit(X, y)
    print("Scikit predict()")
    print(enet_sk.predict(X))
    print("Scikit score()")
    print(enet_sk.score(X, y))

    enet_sk_coef = csr_matrix(enet_sk.coef_, dtype=np.float32).toarray()

    print(enet_sk.coef_)

    print(enet_sk_coef)

    print(enet_wrapper.coef_)

    print(enet_sk.intercept_)
    print(enet_wrapper.intercept_)

    print(enet_sk.n_iter_)
    print(enet_wrapper.n_iter_)

    print("Coeffs, intercept, and n_iters should match")
    assert np.allclose(enet_wrapper.coef_, enet_sk_coef)
    assert np.allclose(enet_wrapper.intercept_, enet_sk.intercept_)
def test_check_input_false():
    X, y, _, _ = build_dataset(n_samples=20, n_features=10)
    X = check_array(X, order='F', dtype='float64')
    y = check_array(X, order='F', dtype='float64')
    clf = ElasticNet(selection='cyclic', tol=1e-8)
    # Check that no error is raised if data is provided in the right format
    clf.fit(X, y, check_input=False)
    # With check_input=False, an exhaustive check is not made on y but its
    # dtype is still cast in _preprocess_data to X's dtype. So the test should
    # pass anyway
    X = check_array(X, order='F', dtype='float32')
    clf.fit(X, y, check_input=False)
    # With no input checking, providing X in C order should result in false
    # computation
    X = check_array(X, order='C', dtype='float64')
    assert_raises(ValueError, clf.fit, X, y, check_input=False)
def test_check_input_false():
    X, y, _, _ = build_dataset(n_samples=20, n_features=10)
    X = check_array(X, order='F', dtype='float64')
    y = check_array(X, order='F', dtype='float64')
    clf = ElasticNet(selection='cyclic', tol=1e-8)
    # Check that no error is raised if data is provided in the right format
    clf.fit(X, y, check_input=False)
    X = check_array(X, order='F', dtype='float32')
    clf.fit(X, y, check_input=True)
    # Check that an error is raised if data is provided in the wrong dtype,
    # because of check bypassing
    assert_raises(ValueError, clf.fit, X, y, check_input=False)

    # With no input checking, providing X in C order should result in false
    # computation
    X = check_array(X, order='C', dtype='float64')
    assert_raises(ValueError, clf.fit, X, y, check_input=False)
Exemple #13
0
def test_random_descent():
    """Test that both random and cyclic selection give the same results.

    Ensure that the test models fully converge and check a wide
    range of conditions.
    """

    # This uses the coordinate descent algo using the gram trick.
    X, y, _, _ = build_dataset(n_samples=50, n_features=20)
    clf_cyclic = ElasticNet(selection='cyclic', tol=1e-8)
    clf_cyclic.fit(X, y)
    clf_random = ElasticNet(selection='random', tol=1e-8, random_state=42)
    clf_random.fit(X, y)
    assert_array_almost_equal(clf_cyclic.coef_, clf_random.coef_)
    assert_almost_equal(clf_cyclic.intercept_, clf_random.intercept_)

    # This uses the descent algo without the gram trick
    clf_cyclic = ElasticNet(selection='cyclic', tol=1e-8)
    clf_cyclic.fit(X.T, y[:20])
    clf_random = ElasticNet(selection='random', tol=1e-8, random_state=42)
    clf_random.fit(X.T, y[:20])
    assert_array_almost_equal(clf_cyclic.coef_, clf_random.coef_)
    assert_almost_equal(clf_cyclic.intercept_, clf_random.intercept_)

    # Sparse Case
    clf_cyclic = ElasticNet(selection='cyclic', tol=1e-8)
    clf_cyclic.fit(sparse.csr_matrix(X), y)
    clf_random = ElasticNet(selection='random', tol=1e-8, random_state=42)
    clf_random.fit(sparse.csr_matrix(X), y)
    assert_array_almost_equal(clf_cyclic.coef_, clf_random.coef_)
    assert_almost_equal(clf_cyclic.intercept_, clf_random.intercept_)

    # Multioutput case.
    new_y = np.hstack((y[:, np.newaxis], y[:, np.newaxis]))
    clf_cyclic = MultiTaskElasticNet(selection='cyclic', tol=1e-8)
    clf_cyclic.fit(X, new_y)
    clf_random = MultiTaskElasticNet(selection='random',
                                     tol=1e-8,
                                     random_state=42)
    clf_random.fit(X, new_y)
    assert_array_almost_equal(clf_cyclic.coef_, clf_random.coef_)
    assert_almost_equal(clf_cyclic.intercept_, clf_random.intercept_)

    # Raise error when selection is not in cyclic or random.
    clf_random = ElasticNet(selection='invalid')
    assert_raises(ValueError, clf_random.fit, X, y)
def test_warm_start_convergence():
    X, y, _, _ = build_dataset()
    model = ElasticNet(alpha=1e-3, tol=1e-3).fit(X, y)
    n_iter_reference = model.n_iter_

    # This dataset is not trivial enough for the model to converge in one pass.
    assert_greater(n_iter_reference, 2)

    # Check that n_iter_ is invariant to multiple calls to fit
    # when warm_start=False, all else being equal.
    model.fit(X, y)
    n_iter_cold_start = model.n_iter_
    assert_equal(n_iter_cold_start, n_iter_reference)

    # Fit the same model again, using a warm start: the optimizer just performs
    # a single pass before checking that it has already converged
    model.set_params(warm_start=True)
    model.fit(X, y)
    n_iter_warm_start = model.n_iter_
    assert_equal(n_iter_warm_start, 1)
                     Pipeline([('earth', Earth()),
                               ('logistic', LogisticRegression())]))],
                   'hard',
                   weights=[1.01, 1.01]), ['predict'],
  create_weird_classification_problem_1()),
 (GradientBoostingClassifier(max_depth=10,
                             n_estimators=10), ['predict_proba', 'predict'],
  create_weird_classification_problem_1()),
 (LogisticRegression(), ['predict_proba', 'predict'],
  create_weird_classification_problem_1()),
 (IsotonicRegression(out_of_bounds='clip'), ['predict'],
  create_isotonic_regression_problem_1()),
 (Earth(), ['predict', 'transform'], create_regression_problem_1()),
 (Earth(allow_missing=True), ['predict', 'transform'],
  create_regression_problem_with_missingness_1()),
 (ElasticNet(), ['predict'], create_regression_problem_1()),
 (ElasticNetCV(), ['predict'], create_regression_problem_1()),
 (LassoCV(), ['predict'], create_regression_problem_1()),
 (Ridge(), ['predict'], create_regression_problem_1()),
 (RidgeCV(), ['predict'], create_regression_problem_1()),
 (SGDRegressor(), ['predict'], create_regression_problem_1()),
 (Lasso(), ['predict'], create_regression_problem_1()),
 (Pipeline([('earth', Earth()), ('logistic', LogisticRegression())]),
  ['predict', 'predict_proba'], create_weird_classification_problem_1()),
 (FeatureUnion([('earth', Earth()), ('earth2', Earth(max_degree=2))],
               transformer_weights={
                   'earth': 1,
                   'earth2': 2
               }), ['transform'], create_weird_classification_problem_1()),
 (RandomForestRegressor(), ['predict'], create_regression_problem_1()),
 (CalibratedClassifierCV(LogisticRegression(),
Exemple #16
0
			'BaggingClassifier':BaggingClassifier(),
			'BaggingRegressor':BaggingRegressor(),
			'BayesianGaussianMixture':BayesianGaussianMixture(),
			'BayesianRidge':BayesianRidge(),
			'BernoulliNB':BernoulliNB(),
			'BernoulliRBM':BernoulliRBM(),
			'Binarizer':Binarizer(),
			'Birch':Birch(),
			'CCA':CCA(),
			'CalibratedClassifierCV':CalibratedClassifierCV(),
			'DBSCAN':DBSCAN(),
			'DPGMM':DPGMM(),
			'DecisionTreeClassifier':DecisionTreeClassifier(),
			'DecisionTreeRegressor':DecisionTreeRegressor(),
			'DictionaryLearning':DictionaryLearning(),
			'ElasticNet':ElasticNet(),
			'ElasticNetCV':ElasticNetCV(),
			'EmpiricalCovariance':EmpiricalCovariance(),
			'ExtraTreeClassifier':ExtraTreeClassifier(),
			'ExtraTreeRegressor':ExtraTreeRegressor(),
			'ExtraTreesClassifier':ExtraTreesClassifier(),
			'ExtraTreesRegressor':ExtraTreesRegressor(),
			'FactorAnalysis':FactorAnalysis(),
			'FastICA':FastICA(),
			'FeatureAgglomeration':FeatureAgglomeration(),
			'FunctionTransformer':FunctionTransformer(),
			'GMM':GMM(),
			'GaussianMixture':GaussianMixture(),
			'GaussianNB':GaussianNB(),
			'GaussianProcess':GaussianProcess(),
			'GaussianProcessClassifier':GaussianProcessClassifier(),
Exemple #17
0
print X_train[123, :]
'''
norm1 =  np.linalg.norm(y_train)    
if norm1 != 0:   
    y_train, y_test =  y_train/norm1, y_test/norm1
print norm1
'''

print y_train.shape

model = SVR(C=1.0, gamma=1.0)
model = LinearRegression()

lasso = Lasso(alpha=0.1).fit(X_train, y_train)
enet = ElasticNet(alpha=0.1, l1_ratio=0.7).fit(X_train, y_train)

y_pred = lasso.predict(X_test)

print "MSE", mean_squared_error(y_test, y_pred)
m = np.mean(y_test)
print "MSE (Mean)", mean_squared_error(y_test, m * np.ones(len(y_test)))

print "r^2 on test data", r2_score(y_test, y_pred)

plt.plot(enet.coef_, label='Elastic net coefficients')
plt.plot(lasso.coef_, label='Lasso coefficients')
plt.legend(loc='best')
plt.title("Lasso R^2: %f, Elastic Net R^2: %f" % (r2_score(
    y_test, lasso.predict(X_test)), r2_score(y_test, enet.predict(X_test))))
plt.show()
Exemple #18
0
boston = datasets.load_boston()
# print(type(boston), boston.keys())
# print(boston.feature_names, boston.DESCR, boston.filename)
X, y = boston.data, boston.target
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=.2,
                                                    random_state=22)
print(X.shape, y.shape)
# plot the price
# plt.plot(y)
# plt.show()
model = SVR(gamma='scale')
model.fit(X_train, y_train)
preds = model.predict(X_test)
plt.plot(y_test, label='y_test')
plt.plot(preds, label='y_predict')
plt.legend()
plt.show()
print(model.score(X_test, y_test))
# compare with ElasticNet linear model
from sklearn.linear_model.coordinate_descent import ElasticNet
model = ElasticNet()
model.fit(X_train, y_train)
preds = model.predict(X_test)
plt.plot(y_test, label='y_test')
plt.plot(preds, label='y_predict')
plt.legend()
plt.show()
print(model.score(X_test, y_test))