Example #1
0
def test_ensemble_heterogeneous_estimators_name_validation(X, y, Ensemble):
    # raise an error when the name contains dunder
    if issubclass(Ensemble, ClassifierMixin):
        estimators = [('lr__', LogisticRegression())]
    else:
        estimators = [('lr__', LinearRegression())]
    ensemble = Ensemble(estimators=estimators)

    err_msg = r"Estimator names must not contain __: got \['lr__'\]"
    with pytest.raises(ValueError, match=err_msg):
        ensemble.fit(X, y)

    # raise an error when the name is not unique
    if issubclass(Ensemble, ClassifierMixin):
        estimators = [('lr', LogisticRegression()),
                      ('lr', LogisticRegression())]
    else:
        estimators = [('lr', LinearRegression()), ('lr', LinearRegression())]
    ensemble = Ensemble(estimators=estimators)

    err_msg = r"Names provided are not unique: \['lr', 'lr'\]"
    with pytest.raises(ValueError, match=err_msg):
        ensemble.fit(X, y)

    # raise an error when the name conflicts with the parameters
    if issubclass(Ensemble, ClassifierMixin):
        estimators = [('estimators', LogisticRegression())]
    else:
        estimators = [('estimators', LinearRegression())]
    ensemble = Ensemble(estimators=estimators)

    err_msg = "Estimator names conflict with constructor arguments"
    with pytest.raises(ValueError, match=err_msg):
        ensemble.fit(X, y)
def test_partial_dependence_easy_target(est, power):
    # If the target y only depends on one feature in an obvious way (linear or
    # quadratic) then the partial dependence for that feature should reflect
    # it.
    # We here fit a linear regression_data model (with polynomial features if
    # needed) and compute r_squared to check that the partial dependence
    # correctly reflects the target.

    rng = np.random.RandomState(0)
    n_samples = 200
    target_variable = 2
    X = rng.normal(size=(n_samples, 5))
    y = X[:, target_variable]**power

    est.fit(X, y)

    averaged_predictions, values = partial_dependence(
        est, features=[target_variable], X=X, grid_resolution=1000)

    new_X = values[0].reshape(-1, 1)
    new_y = averaged_predictions[0]
    # add polynomial features if needed
    new_X = PolynomialFeatures(degree=power).fit_transform(new_X)

    lr = LinearRegression().fit(new_X, new_y)
    r2 = r2_score(new_y, lr.predict(new_X))

    assert r2 > .99
def test_plot_partial_dependence_passing_numpy_axes(pyplot, clf_boston,
                                                    boston):
    grid_resolution = 25
    feature_names = boston.feature_names.tolist()
    disp1 = plot_partial_dependence(clf_boston, boston.data,
                                    ['CRIM', 'ZN'],
                                    grid_resolution=grid_resolution,
                                    feature_names=feature_names)
    assert disp1.axes_.shape == (1, 2)
    assert disp1.axes_[0, 0].get_ylabel() == "Partial dependence"
    assert disp1.axes_[0, 1].get_ylabel() == ""
    assert len(disp1.axes_[0, 0].get_lines()) == 1
    assert len(disp1.axes_[0, 1].get_lines()) == 1

    lr = LinearRegression()
    lr.fit(boston.data, boston.target)

    disp2 = plot_partial_dependence(lr, boston.data,
                                    ['CRIM', 'ZN'],
                                    grid_resolution=grid_resolution,
                                    feature_names=feature_names,
                                    ax=disp1.axes_)

    assert np.all(disp1.axes_ == disp2.axes_)
    assert len(disp2.axes_[0, 0].get_lines()) == 2
    assert len(disp2.axes_[0, 1].get_lines()) == 2
Example #4
0
def test_omp_reaches_least_squares():
    # Use small simple data; it's a sanity check but OMP can stop early
    rng = check_random_state(0)
    n_samples, n_features = (10, 8)
    n_targets = 3
    X = rng.randn(n_samples, n_features)
    Y = rng.randn(n_samples, n_targets)
    omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_features)
    lstsq = LinearRegression()
    omp.fit(X, Y)
    lstsq.fit(X, Y)
    assert_array_almost_equal(omp.coef_, lstsq.coef_)
Example #5
0
def test_linear_regression_sparse(random_state=0):
    # Test that linear regression also works with sparse data
    random_state = check_random_state(random_state)
    for i in range(10):
        n = 100
        X = sparse.eye(n, n)
        beta = random_state.rand(n)
        y = X * beta[:, np.newaxis]

        ols = LinearRegression()
        ols.fit(X, y.ravel())
        assert_array_almost_equal(beta, ols.coef_ + ols.intercept_)

        assert_array_almost_equal(ols.predict(X) - y.ravel(), 0)
Example #6
0
def test_transform_target_regressor_invertible():
    X, y = friedman
    regr = TransformedTargetRegressor(regressor=LinearRegression(),
                                      func=np.sqrt,
                                      inverse_func=np.log,
                                      check_inverse=True)
    assert_warns_message(
        UserWarning, "The provided functions or transformer"
        " are not strictly inverse of each other.", regr.fit, X, y)
    regr = TransformedTargetRegressor(regressor=LinearRegression(),
                                      func=np.sqrt,
                                      inverse_func=np.log)
    regr.set_params(check_inverse=False)
    assert_no_warnings(regr.fit, X, y)
Example #7
0
def test_transform_target_regressor_2d_transformer(X, y):
    # Check consistency with transformer accepting only 2D array and a 1D/2D y
    # array.
    transformer = StandardScaler()
    regr = TransformedTargetRegressor(regressor=LinearRegression(),
                                      transformer=transformer)
    y_pred = regr.fit(X, y).predict(X)
    assert y.shape == y_pred.shape
    # consistency forward transform
    if y.ndim == 1:  # create a 2D array and squeeze results
        y_tran = regr.transformer_.transform(y.reshape(-1, 1)).squeeze()
    else:
        y_tran = regr.transformer_.transform(y)
    _check_standard_scaled(y, y_tran)
    assert y.shape == y_pred.shape
    # consistency inverse transform
    assert_allclose(y, regr.transformer_.inverse_transform(y_tran).squeeze())
    # consistency of the regressor
    lr = LinearRegression()
    transformer2 = clone(transformer)
    if y.ndim == 1:  # create a 2D array and squeeze results
        lr.fit(X, transformer2.fit_transform(y.reshape(-1, 1)).squeeze())
    else:
        lr.fit(X, transformer2.fit_transform(y))
    y_lr_pred = lr.predict(X)
    assert_allclose(y_pred, transformer2.inverse_transform(y_lr_pred))
    assert_allclose(regr.regressor_.coef_, lr.coef_)
def test_plot_partial_dependence_fig_deprecated(pyplot):
    # Make sure fig object is correctly used if not None
    X, y = make_regression(n_samples=50, random_state=0)
    clf = LinearRegression()
    clf.fit(X, y)

    fig = pyplot.figure()
    grid_resolution = 25

    msg = ("The fig parameter is deprecated in version 0.22 and will be "
           "removed in version 0.24")
    with pytest.warns(FutureWarning, match=msg):
        plot_partial_dependence(
            clf, X, [0, 1], target=0, grid_resolution=grid_resolution, fig=fig)

    assert pyplot.gcf() is fig
Example #9
0
def test_stacking_regressor_diabetes(cv, final_estimator, predict_params,
                                     passthrough):
    # prescale the data to avoid convergence warning without using a pipeline
    # for later assert
    X_train, X_test, y_train, _ = train_test_split(scale(X_diabetes),
                                                   y_diabetes,
                                                   random_state=42)
    estimators = [('lr', LinearRegression()), ('svr', LinearSVR())]
    reg = StackingRegressor(estimators=estimators,
                            final_estimator=final_estimator,
                            cv=cv,
                            passthrough=passthrough)
    reg.fit(X_train, y_train)
    result = reg.predict(X_test, **predict_params)
    expected_result_length = 2 if predict_params else 1
    if predict_params:
        assert len(result) == expected_result_length

    X_trans = reg.transform(X_test)
    expected_column_count = 12 if passthrough else 2
    assert X_trans.shape[1] == expected_column_count
    if passthrough:
        assert_allclose(X_test, X_trans[:, -10:])

    reg.set_params(lr='drop')
    reg.fit(X_train, y_train)
    reg.predict(X_test)

    X_trans = reg.transform(X_test)
    expected_column_count_drop = 11 if passthrough else 1
    assert X_trans.shape[1] == expected_column_count_drop
    if passthrough:
        assert_allclose(X_test, X_trans[:, -10:])
Example #10
0
def test_transform_target_regressor_functions_multioutput():
    X = friedman[0]
    y = np.vstack((friedman[1], friedman[1]**2 + 1)).T
    regr = TransformedTargetRegressor(regressor=LinearRegression(),
                                      func=np.log,
                                      inverse_func=np.exp)
    y_pred = regr.fit(X, y).predict(X)
    # check the transformer output
    y_tran = regr.transformer_.transform(y)
    assert_allclose(np.log(y), y_tran)
    assert_allclose(y, regr.transformer_.inverse_transform(y_tran))
    assert y.shape == y_pred.shape
    assert_allclose(y_pred, regr.inverse_func(regr.regressor_.predict(X)))
    # check the regressor output
    lr = LinearRegression().fit(X, regr.func(y))
    assert_allclose(regr.regressor_.coef_.ravel(), lr.coef_.ravel())
Example #11
0
def test_transform_target_regressor_error():
    X, y = friedman
    # provide a transformer and functions at the same time
    regr = TransformedTargetRegressor(regressor=LinearRegression(),
                                      transformer=StandardScaler(),
                                      func=np.exp,
                                      inverse_func=np.log)
    with pytest.raises(ValueError,
                       match="'transformer' and functions"
                       " 'func'/'inverse_func' cannot both be set."):
        regr.fit(X, y)
    # fit with sample_weight with a regressor which does not support it
    sample_weight = np.ones((y.shape[0], ))
    regr = TransformedTargetRegressor(regressor=Lasso(),
                                      transformer=StandardScaler())
    with pytest.raises(TypeError,
                       match=r"fit\(\) got an unexpected "
                       "keyword argument 'sample_weight'"):
        regr.fit(X, y, sample_weight=sample_weight)
    # func is given but inverse_func is not
    regr = TransformedTargetRegressor(func=np.exp)
    with pytest.raises(ValueError,
                       match="When 'func' is provided, "
                       "'inverse_func' must also be provided"):
        regr.fit(X, y)
Example #12
0
def test_subsamples():
    X, y, w, c = gen_toy_problem_4d()
    theil_sen = TheilSenRegressor(n_subsamples=X.shape[0],
                                  random_state=0).fit(X, y)
    lstq = LinearRegression().fit(X, y)
    # Check for exact the same results as Least Squares
    assert_array_almost_equal(theil_sen.coef_, lstq.coef_, 9)
Example #13
0
def test_transform_target_regressor_functions():
    X, y = friedman
    regr = TransformedTargetRegressor(regressor=LinearRegression(),
                                      func=np.log,
                                      inverse_func=np.exp)
    y_pred = regr.fit(X, y).predict(X)
    # check the transformer output
    y_tran = regr.transformer_.transform(y.reshape(-1, 1)).squeeze()
    assert_allclose(np.log(y), y_tran)
    assert_allclose(
        y,
        regr.transformer_.inverse_transform(y_tran.reshape(-1, 1)).squeeze())
    assert y.shape == y_pred.shape
    assert_allclose(y_pred, regr.inverse_func(regr.regressor_.predict(X)))
    # check the regressor output
    lr = LinearRegression().fit(X, regr.func(y))
    assert_allclose(regr.regressor_.coef_.ravel(), lr.coef_.ravel())
Example #14
0
def test_theil_sen_1d():
    X, y, w, c = gen_toy_problem_1d()
    # Check that Least Squares fails
    lstq = LinearRegression().fit(X, y)
    assert np.abs(lstq.coef_ - w) > 0.9
    # Check that Theil-Sen works
    theil_sen = TheilSenRegressor(random_state=0).fit(X, y)
    assert_array_almost_equal(theil_sen.coef_, w, 1)
    assert_array_almost_equal(theil_sen.intercept_, c, 1)
Example #15
0
def test_theil_sen_1d_no_intercept():
    X, y, w, c = gen_toy_problem_1d(intercept=False)
    # Check that Least Squares fails
    lstq = LinearRegression(fit_intercept=False).fit(X, y)
    assert np.abs(lstq.coef_ - w - c) > 0.5
    # Check that Theil-Sen works
    theil_sen = TheilSenRegressor(fit_intercept=False,
                                  random_state=0).fit(X, y)
    assert_array_almost_equal(theil_sen.coef_, w + c, 1)
    assert_almost_equal(theil_sen.intercept_, 0.)
Example #16
0
def test_fit_intercept():
    # Test assertions on betas shape.
    X2 = np.array([[0.38349978, 0.61650022], [0.58853682, 0.41146318]])
    X3 = np.array([[0.27677969, 0.70693172, 0.01628859],
                   [0.08385139, 0.20692515, 0.70922346]])
    y = np.array([1, 1])

    lr2_without_intercept = LinearRegression(fit_intercept=False).fit(X2, y)
    lr2_with_intercept = LinearRegression().fit(X2, y)

    lr3_without_intercept = LinearRegression(fit_intercept=False).fit(X3, y)
    lr3_with_intercept = LinearRegression().fit(X3, y)

    assert (
        lr2_with_intercept.coef_.shape == lr2_without_intercept.coef_.shape)
    assert (
        lr3_with_intercept.coef_.shape == lr3_without_intercept.coef_.shape)
    assert (
        lr2_without_intercept.coef_.ndim == lr3_without_intercept.coef_.ndim)
Example #17
0
def test_ransac_stop_score():
    base_estimator = LinearRegression()
    ransac_estimator = RANSACRegressor(base_estimator,
                                       min_samples=2,
                                       residual_threshold=5,
                                       stop_score=0,
                                       random_state=0)
    ransac_estimator.fit(X, y)

    assert ransac_estimator.n_trials_ == 1
Example #18
0
def test_theil_sen_2d():
    X, y, w, c = gen_toy_problem_2d()
    # Check that Least Squares fails
    lstq = LinearRegression().fit(X, y)
    assert norm(lstq.coef_ - w) > 1.0
    # Check that Theil-Sen works
    theil_sen = TheilSenRegressor(max_subpopulation=1e3,
                                  random_state=0).fit(X, y)
    assert_array_almost_equal(theil_sen.coef_, w, 1)
    assert_array_almost_equal(theil_sen.intercept_, c, 1)
Example #19
0
def test_regressormixin_score_multioutput():
    from sklearn_lib.linear_model import LinearRegression
    # no warnings when y_type is continuous
    X = [[1], [2], [3]]
    y = [1, 2, 3]
    reg = LinearRegression().fit(X, y)
    assert_no_warnings(reg.score, X, y)
    # warn when y_type is continuous-multioutput
    y = [[1, 2], [2, 3], [3, 4]]
    reg = LinearRegression().fit(X, y)
    msg = ("The default value of multioutput (not exposed in "
           "score method) will change from 'variance_weighted' "
           "to 'uniform_average' in 0.23 to keep consistent "
           "with 'metrics.r2_score'. To specify the default "
           "value manually and avoid the warning, please "
           "either call 'metrics.r2_score' directly or make a "
           "custom scorer with 'metrics.make_scorer' (the "
           "built-in scorer 'r2' uses "
           "multioutput='uniform_average').")
    assert_warns_message(FutureWarning, msg, reg.score, X, y)
Example #20
0
def test_linear_regression_sample_weights():
    # TODO: loop over sparse data as well

    rng = np.random.RandomState(0)

    # It would not work with under-determined systems
    for n_samples, n_features in ((6, 5), ):

        y = rng.randn(n_samples)
        X = rng.randn(n_samples, n_features)
        sample_weight = 1.0 + rng.rand(n_samples)

        for intercept in (True, False):

            # LinearRegression with explicit sample_weight
            reg = LinearRegression(fit_intercept=intercept)
            reg.fit(X, y, sample_weight=sample_weight)
            coefs1 = reg.coef_
            inter1 = reg.intercept_

            assert reg.coef_.shape == (X.shape[1], )  # sanity checks
            assert reg.score(X, y) > 0.5

            # Closed form of the weighted least square
            # theta = (X^T W X)^(-1) * X^T W y
            W = np.diag(sample_weight)
            if intercept is False:
                X_aug = X
            else:
                dummy_column = np.ones(shape=(n_samples, 1))
                X_aug = np.concatenate((dummy_column, X), axis=1)

            coefs2 = linalg.solve(
                X_aug.T.dot(W).dot(X_aug),
                X_aug.T.dot(W).dot(y))

            if intercept is False:
                assert_array_almost_equal(coefs1, coefs2)
            else:
                assert_array_almost_equal(coefs1, coefs2[1:])
                assert_almost_equal(inter1, coefs2[0])
Example #21
0
def test_classes_property():
    X = iris.data
    y = iris.target

    reg = make_pipeline(SelectKBest(k=1), LinearRegression())
    reg.fit(X, y)
    assert_raises(AttributeError, getattr, reg, "classes_")

    clf = make_pipeline(SelectKBest(k=1), LogisticRegression(random_state=0))
    assert_raises(AttributeError, getattr, clf, "classes_")
    clf.fit(X, y)
    assert_array_equal(clf.classes_, np.unique(y))
Example #22
0
def test_linear_regression_multiple_outcome(random_state=0):
    # Test multiple-outcome linear regressions
    X, y = make_regression(random_state=random_state)

    Y = np.vstack((y, y)).T
    n_features = X.shape[1]

    reg = LinearRegression()
    reg.fit((X), Y)
    assert reg.coef_.shape == (2, n_features)
    Y_pred = reg.predict(X)
    reg.fit(X, y)
    y_pred = reg.predict(X)
    assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
Example #23
0
def test_linear_regression():
    # Test LinearRegression on a simple dataset.
    # a simple dataset
    X = [[1], [2]]
    Y = [1, 2]

    reg = LinearRegression()
    reg.fit(X, Y)

    assert_array_almost_equal(reg.coef_, [1])
    assert_array_almost_equal(reg.intercept_, [0])
    assert_array_almost_equal(reg.predict(X), [1, 2])

    # test it also for degenerate input
    X = [[1]]
    Y = [0]

    reg = LinearRegression()
    reg.fit(X, Y)
    assert_array_almost_equal(reg.coef_, [0])
    assert_array_almost_equal(reg.intercept_, [0])
    assert_array_almost_equal(reg.predict(X), [0])
Example #24
0
def test_linear_regression_sparse_multiple_outcome(random_state=0):
    # Test multiple-outcome linear regressions with sparse data
    random_state = check_random_state(random_state)
    X, y = make_sparse_uncorrelated(random_state=random_state)
    X = sparse.coo_matrix(X)
    Y = np.vstack((y, y)).T
    n_features = X.shape[1]

    ols = LinearRegression()
    ols.fit(X, Y)
    assert ols.coef_.shape == (2, n_features)
    Y_pred = ols.predict(X)
    ols.fit(X, y.ravel())
    y_pred = ols.predict(X)
    assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
Example #25
0
def test_linear_regression_sparse_equal_dense(normalize, fit_intercept):
    # Test that linear regression agrees between sparse and dense
    rng = check_random_state(0)
    n_samples = 200
    n_features = 2
    X = rng.randn(n_samples, n_features)
    X[X < 0.1] = 0.
    Xcsr = sparse.csr_matrix(X)
    y = rng.rand(n_samples)
    params = dict(normalize=normalize, fit_intercept=fit_intercept)
    clf_dense = LinearRegression(**params)
    clf_sparse = LinearRegression(**params)
    clf_dense.fit(X, y)
    clf_sparse.fit(Xcsr, y)
    assert clf_dense.intercept_ == pytest.approx(clf_sparse.intercept_)
    assert_allclose(clf_dense.coef_, clf_sparse.coef_)
Example #26
0
def test_ransac_predict():
    X = np.arange(100)[:, None]
    y = np.zeros((100, ))
    y[0] = 1
    y[1] = 100

    base_estimator = LinearRegression()
    ransac_estimator = RANSACRegressor(base_estimator,
                                       min_samples=2,
                                       residual_threshold=0.5,
                                       random_state=0)
    ransac_estimator.fit(X, y)

    assert_array_equal(ransac_estimator.predict(X), np.zeros(100))
Example #27
0
def test_ransac_no_valid_model():
    def is_model_valid(estimator, X, y):
        return False

    base_estimator = LinearRegression()
    ransac_estimator = RANSACRegressor(base_estimator,
                                       is_model_valid=is_model_valid,
                                       max_trials=5)

    msg = ("RANSAC could not find a valid consensus set")
    assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y)
    assert ransac_estimator.n_skips_no_inliers_ == 0
    assert ransac_estimator.n_skips_invalid_data_ == 0
    assert ransac_estimator.n_skips_invalid_model_ == 5
Example #28
0
def test_ransac_is_model_valid():
    def is_model_valid(estimator, X, y):
        assert X.shape[0] == 2
        assert y.shape[0] == 2
        return False

    base_estimator = LinearRegression()
    ransac_estimator = RANSACRegressor(base_estimator,
                                       min_samples=2,
                                       residual_threshold=5,
                                       is_model_valid=is_model_valid,
                                       random_state=0)

    assert_raises(ValueError, ransac_estimator.fit, X, y)
Example #29
0
def test_less_samples_than_features():
    random_state = np.random.RandomState(0)
    n_samples, n_features = 10, 20
    X = random_state.normal(size=(n_samples, n_features))
    y = random_state.normal(size=n_samples)
    # Check that Theil-Sen falls back to Least Squares if fit_intercept=False
    theil_sen = TheilSenRegressor(fit_intercept=False,
                                  random_state=0).fit(X, y)
    lstq = LinearRegression(fit_intercept=False).fit(X, y)
    assert_array_almost_equal(theil_sen.coef_, lstq.coef_, 12)
    # Check fit_intercept=True case. This will not be equal to the Least
    # Squares solution since the intercept is calculated differently.
    theil_sen = TheilSenRegressor(fit_intercept=True, random_state=0).fit(X, y)
    y_pred = theil_sen.predict(X)
    assert_array_almost_equal(y_pred, y, 12)
Example #30
0
def test_ransac_default_residual_threshold():
    base_estimator = LinearRegression()
    ransac_estimator = RANSACRegressor(base_estimator,
                                       min_samples=2,
                                       random_state=0)

    # Estimate parameters of corrupted data
    ransac_estimator.fit(X, y)

    # Ground truth / reference inlier mask
    ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_).astype(
        np.bool_)
    ref_inlier_mask[outliers] = False

    assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)