예제 #1
0
def test_partial_dependence_easy_target(est, power):
    # If the target y only depends on one feature in an obvious way (linear or
    # quadratic) then the partial dependence for that feature should reflect
    # it.
    # We here fit a linear regression_data model (with polynomial features if
    # needed) and compute r_squared to check that the partial dependence
    # correctly reflects the target.

    rng = np.random.RandomState(0)
    n_samples = 200
    target_variable = 2
    X = rng.normal(size=(n_samples, 5))
    y = X[:, target_variable]**power

    est.fit(X, y)

    averaged_predictions, values = partial_dependence(
        est, features=[target_variable], X=X, grid_resolution=1000)

    new_X = values[0].reshape(-1, 1)
    new_y = averaged_predictions[0]
    # add polynomial features if needed
    new_X = PolynomialFeatures(degree=power).fit_transform(new_X)

    lr = LinearRegression().fit(new_X, new_y)
    r2 = r2_score(new_y, lr.predict(new_X))

    assert r2 > .99
예제 #2
0
def test_huber_equals_lr_for_high_epsilon():
    # Test that Ridge matches LinearRegression for large epsilon
    X, y = make_regression_with_outliers()
    lr = LinearRegression()
    lr.fit(X, y)
    huber = HuberRegressor(epsilon=1e3, alpha=0.0)
    huber.fit(X, y)
    assert_almost_equal(huber.coef_, lr.coef_, 3)
    assert_almost_equal(huber.intercept_, lr.intercept_, 2)
예제 #3
0
def test_transform_target_regressor_invertible():
    X, y = friedman
    regr = TransformedTargetRegressor(regressor=LinearRegression(),
                                      func=np.sqrt, inverse_func=np.log,
                                      check_inverse=True)
    assert_warns_message(UserWarning, "The provided functions or transformer"
                         " are not strictly inverse of each other.",
                         regr.fit, X, y)
    regr = TransformedTargetRegressor(regressor=LinearRegression(),
                                      func=np.sqrt, inverse_func=np.log)
    regr.set_params(check_inverse=False)
    assert_no_warnings(regr.fit, X, y)
예제 #4
0
def test_omp_reaches_least_squares():
    # Use small simple data; it's a sanity check but OMP can stop early
    rng = check_random_state(0)
    n_samples, n_features = (10, 8)
    n_targets = 3
    X = rng.randn(n_samples, n_features)
    Y = rng.randn(n_samples, n_targets)
    omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_features)
    lstsq = LinearRegression()
    omp.fit(X, Y)
    lstsq.fit(X, Y)
    assert_array_almost_equal(omp.coef_, lstsq.coef_)
예제 #5
0
def test_transform_target_regressor_2d_transformer(X, y):
    # Check consistency with transformer accepting only 2D array and a 1D/2D y
    # array.
    transformer = StandardScaler()
    regr = TransformedTargetRegressor(regressor=LinearRegression(),
                                      transformer=transformer)
    y_pred = regr.fit(X, y).predict(X)
    assert y.shape == y_pred.shape
    # consistency forward transform
    if y.ndim == 1:  # create a 2D array and squeeze results
        y_tran = regr.transformer_.transform(y.reshape(-1, 1)).squeeze()
    else:
        y_tran = regr.transformer_.transform(y)
    _check_standard_scaled(y, y_tran)
    assert y.shape == y_pred.shape
    # consistency inverse transform
    assert_allclose(y, regr.transformer_.inverse_transform(
        y_tran).squeeze())
    # consistency of the regressor
    lr = LinearRegression()
    transformer2 = clone(transformer)
    if y.ndim == 1:  # create a 2D array and squeeze results
        lr.fit(X, transformer2.fit_transform(y.reshape(-1, 1)).squeeze())
    else:
        lr.fit(X, transformer2.fit_transform(y))
    y_lr_pred = lr.predict(X)
    assert_allclose(y_pred, transformer2.inverse_transform(y_lr_pred))
    assert_allclose(regr.regressor_.coef_, lr.coef_)
예제 #6
0
def test_transform_target_regressor_functions_multioutput():
    X = friedman[0]
    y = np.vstack((friedman[1], friedman[1] ** 2 + 1)).T
    regr = TransformedTargetRegressor(regressor=LinearRegression(),
                                      func=np.log, inverse_func=np.exp)
    y_pred = regr.fit(X, y).predict(X)
    # check the transformer output
    y_tran = regr.transformer_.transform(y)
    assert_allclose(np.log(y), y_tran)
    assert_allclose(y, regr.transformer_.inverse_transform(y_tran))
    assert y.shape == y_pred.shape
    assert_allclose(y_pred, regr.inverse_func(regr.regressor_.predict(X)))
    # check the regressor output
    lr = LinearRegression().fit(X, regr.func(y))
    assert_allclose(regr.regressor_.coef_.ravel(), lr.coef_.ravel())
예제 #7
0
def test_transform_target_regressor_functions():
    X, y = friedman
    regr = TransformedTargetRegressor(regressor=LinearRegression(),
                                      func=np.log, inverse_func=np.exp)
    y_pred = regr.fit(X, y).predict(X)
    # check the transformer output
    y_tran = regr.transformer_.transform(y.reshape(-1, 1)).squeeze()
    assert_allclose(np.log(y), y_tran)
    assert_allclose(y, regr.transformer_.inverse_transform(
        y_tran.reshape(-1, 1)).squeeze())
    assert y.shape == y_pred.shape
    assert_allclose(y_pred, regr.inverse_func(regr.regressor_.predict(X)))
    # check the regressor output
    lr = LinearRegression().fit(X, regr.func(y))
    assert_allclose(regr.regressor_.coef_.ravel(), lr.coef_.ravel())
예제 #8
0
def test_plot_partial_dependence_fig(pyplot):
    # Make sure fig object is correctly used if not None
    (X, y), _ = regression_data
    clf = LinearRegression()
    clf.fit(X, y)

    fig = pyplot.figure()
    grid_resolution = 25
    plot_partial_dependence(clf,
                            X, [0, 1],
                            target=0,
                            grid_resolution=grid_resolution,
                            fig=fig)

    assert pyplot.gcf() is fig
예제 #9
0
def test_subsamples():
    X, y, w, c = gen_toy_problem_4d()
    theil_sen = TheilSenRegressor(n_subsamples=X.shape[0],
                                  random_state=0).fit(X, y)
    lstq = LinearRegression().fit(X, y)
    # Check for exact the same results as Least Squares
    assert_array_almost_equal(theil_sen.coef_, lstq.coef_, 9)
예제 #10
0
def test_theil_sen_1d():
    X, y, w, c = gen_toy_problem_1d()
    # Check that Least Squares fails
    lstq = LinearRegression().fit(X, y)
    assert np.abs(lstq.coef_ - w) > 0.9
    # Check that Theil-Sen works
    theil_sen = TheilSenRegressor(random_state=0).fit(X, y)
    assert_array_almost_equal(theil_sen.coef_, w, 1)
    assert_array_almost_equal(theil_sen.intercept_, c, 1)
예제 #11
0
def test_theil_sen_1d_no_intercept():
    X, y, w, c = gen_toy_problem_1d(intercept=False)
    # Check that Least Squares fails
    lstq = LinearRegression(fit_intercept=False).fit(X, y)
    assert np.abs(lstq.coef_ - w - c) > 0.5
    # Check that Theil-Sen works
    theil_sen = TheilSenRegressor(fit_intercept=False,
                                  random_state=0).fit(X, y)
    assert_array_almost_equal(theil_sen.coef_, w + c, 1)
    assert_almost_equal(theil_sen.intercept_, 0.)
예제 #12
0
def test_ransac_stop_score():
    base_estimator = LinearRegression()
    ransac_estimator = RANSACRegressor(base_estimator,
                                       min_samples=2,
                                       residual_threshold=5,
                                       stop_score=0,
                                       random_state=0)
    ransac_estimator.fit(X, y)

    assert ransac_estimator.n_trials_ == 1
예제 #13
0
def test_theil_sen_2d():
    X, y, w, c = gen_toy_problem_2d()
    # Check that Least Squares fails
    lstq = LinearRegression().fit(X, y)
    assert norm(lstq.coef_ - w) > 1.0
    # Check that Theil-Sen works
    theil_sen = TheilSenRegressor(max_subpopulation=1e3,
                                  random_state=0).fit(X, y)
    assert_array_almost_equal(theil_sen.coef_, w, 1)
    assert_array_almost_equal(theil_sen.intercept_, c, 1)
예제 #14
0
def test_regressormixin_score_multioutput():
    from mrex.linear_model import LinearRegression
    # no warnings when y_type is continuous
    X = [[1], [2], [3]]
    y = [1, 2, 3]
    reg = LinearRegression().fit(X, y)
    assert_no_warnings(reg.score, X, y)
    # warn when y_type is continuous-multioutput
    y = [[1, 2], [2, 3], [3, 4]]
    reg = LinearRegression().fit(X, y)
    msg = ("The default value of multioutput (not exposed in "
           "score method) will change from 'variance_weighted' "
           "to 'uniform_average' in 0.23 to keep consistent "
           "with 'metrics.r2_score'. To specify the default "
           "value manually and avoid the warning, please "
           "either call 'metrics.r2_score' directly or make a "
           "custom scorer with 'metrics.make_scorer' (the "
           "built-in scorer 'r2' uses "
           "multioutput='uniform_average').")
    assert_warns_message(FutureWarning, msg, reg.score, X, y)
예제 #15
0
def test_ovr_multilabel():
    # Toy dataset where features correspond directly to labels.
    X = np.array([[0, 4, 5], [0, 5, 0], [3, 3, 3], [4, 0, 6], [6, 0, 0]])
    y = np.array([[0, 1, 1], [0, 1, 0], [1, 1, 1], [1, 0, 1], [1, 0, 0]])

    for base_clf in (MultinomialNB(), LinearSVC(random_state=0),
                     LinearRegression(), Ridge(), ElasticNet(),
                     Lasso(alpha=0.5)):
        clf = OneVsRestClassifier(base_clf).fit(X, y)
        y_pred = clf.predict([[0, 4, 4]])[0]
        assert_array_equal(y_pred, [0, 1, 1])
        assert clf.multilabel_
예제 #16
0
def test_transform_target_regressor_1d_transformer(X, y):
    # All transformer in scikit-learn expect 2D data. FunctionTransformer with
    # validate=False lift this constraint without checking that the input is a
    # 2D vector. We check the consistency of the data shape using a 1D and 2D y
    # array.
    transformer = FunctionTransformer(func=lambda x: x + 1,
                                      inverse_func=lambda x: x - 1)
    regr = TransformedTargetRegressor(regressor=LinearRegression(),
                                      transformer=transformer)
    y_pred = regr.fit(X, y).predict(X)
    assert y.shape == y_pred.shape
    # consistency forward transform
    y_tran = regr.transformer_.transform(y)
    _check_shifted_by_one(y, y_tran)
    assert y.shape == y_pred.shape
    # consistency inverse transform
    assert_allclose(y, regr.transformer_.inverse_transform(
        y_tran).squeeze())
    # consistency of the regressor
    lr = LinearRegression()
    transformer2 = clone(transformer)
    lr.fit(X, transformer2.fit_transform(y))
    y_lr_pred = lr.predict(X)
    assert_allclose(y_pred, transformer2.inverse_transform(y_lr_pred))
    assert_allclose(regr.regressor_.coef_, lr.coef_)
예제 #17
0
def test_classes_property():
    iris = load_iris()
    X = iris.data
    y = iris.target

    reg = make_pipeline(SelectKBest(k=1), LinearRegression())
    reg.fit(X, y)
    assert_raises(AttributeError, getattr, reg, "classes_")

    clf = make_pipeline(SelectKBest(k=1), LogisticRegression(random_state=0))
    assert_raises(AttributeError, getattr, clf, "classes_")
    clf.fit(X, y)
    assert_array_equal(clf.classes_, np.unique(y))
예제 #18
0
def test_ransac_no_valid_model():
    def is_model_valid(estimator, X, y):
        return False

    base_estimator = LinearRegression()
    ransac_estimator = RANSACRegressor(base_estimator,
                                       is_model_valid=is_model_valid,
                                       max_trials=5)

    msg = ("RANSAC could not find a valid consensus set")
    assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y)
    assert ransac_estimator.n_skips_no_inliers_ == 0
    assert ransac_estimator.n_skips_invalid_data_ == 0
    assert ransac_estimator.n_skips_invalid_model_ == 5
예제 #19
0
def test_ransac_predict():
    X = np.arange(100)[:, None]
    y = np.zeros((100, ))
    y[0] = 1
    y[1] = 100

    base_estimator = LinearRegression()
    ransac_estimator = RANSACRegressor(base_estimator,
                                       min_samples=2,
                                       residual_threshold=0.5,
                                       random_state=0)
    ransac_estimator.fit(X, y)

    assert_array_equal(ransac_estimator.predict(X), np.zeros(100))
예제 #20
0
def test_ransac_is_model_valid():
    def is_model_valid(estimator, X, y):
        assert X.shape[0] == 2
        assert y.shape[0] == 2
        return False

    base_estimator = LinearRegression()
    ransac_estimator = RANSACRegressor(base_estimator,
                                       min_samples=2,
                                       residual_threshold=5,
                                       is_model_valid=is_model_valid,
                                       random_state=0)

    assert_raises(ValueError, ransac_estimator.fit, X, y)
예제 #21
0
def test_ransac_resid_thresh_no_inliers():
    # When residual_threshold=0.0 there are no inliers and a
    # ValueError with a message should be raised
    base_estimator = LinearRegression()
    ransac_estimator = RANSACRegressor(base_estimator,
                                       min_samples=2,
                                       residual_threshold=0.0,
                                       random_state=0,
                                       max_trials=5)

    msg = ("RANSAC could not find a valid consensus set")
    assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y)
    assert ransac_estimator.n_skips_no_inliers_ == 5
    assert ransac_estimator.n_skips_invalid_data_ == 0
    assert ransac_estimator.n_skips_invalid_model_ == 0
예제 #22
0
def test_less_samples_than_features():
    random_state = np.random.RandomState(0)
    n_samples, n_features = 10, 20
    X = random_state.normal(size=(n_samples, n_features))
    y = random_state.normal(size=n_samples)
    # Check that Theil-Sen falls back to Least Squares if fit_intercept=False
    theil_sen = TheilSenRegressor(fit_intercept=False,
                                  random_state=0).fit(X, y)
    lstq = LinearRegression(fit_intercept=False).fit(X, y)
    assert_array_almost_equal(theil_sen.coef_, lstq.coef_, 12)
    # Check fit_intercept=True case. This will not be equal to the Least
    # Squares solution since the intercept is calculated differently.
    theil_sen = TheilSenRegressor(fit_intercept=True, random_state=0).fit(X, y)
    y_pred = theil_sen.predict(X)
    assert_array_almost_equal(y_pred, y, 12)
예제 #23
0
def test_plot_partial_dependence_multioutput(pyplot):
    # Test partial dependence plot function on multi-output input.
    (X, y), _ = multioutput_regression_data
    clf = LinearRegression()
    clf.fit(X, y)

    grid_resolution = 25
    plot_partial_dependence(clf,
                            X, [0, 1],
                            target=0,
                            grid_resolution=grid_resolution)
    fig = pyplot.gcf()
    axs = fig.get_axes()
    assert len(axs) == 2
    assert all(ax.has_data for ax in axs)

    plot_partial_dependence(clf,
                            X, [0, 1],
                            target=1,
                            grid_resolution=grid_resolution)
    fig = pyplot.gcf()
    axs = fig.get_axes()
    assert len(axs) == 2
    assert all(ax.has_data for ax in axs)
예제 #24
0
def test_ransac_score():
    X = np.arange(100)[:, None]
    y = np.zeros((100, ))
    y[0] = 1
    y[1] = 100

    base_estimator = LinearRegression()
    ransac_estimator = RANSACRegressor(base_estimator,
                                       min_samples=2,
                                       residual_threshold=0.5,
                                       random_state=0)
    ransac_estimator.fit(X, y)

    assert ransac_estimator.score(X[2:], y[2:]) == 1
    assert ransac_estimator.score(X[:2], y[:2]) < 1
예제 #25
0
def test_ransac_default_residual_threshold():
    base_estimator = LinearRegression()
    ransac_estimator = RANSACRegressor(base_estimator,
                                       min_samples=2,
                                       random_state=0)

    # Estimate parameters of corrupted data
    ransac_estimator.fit(X, y)

    # Ground truth / reference inlier mask
    ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_).astype(
        np.bool_)
    ref_inlier_mask[outliers] = False

    assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
예제 #26
0
def test_ransac_none_estimator():

    base_estimator = LinearRegression()

    ransac_estimator = RANSACRegressor(base_estimator,
                                       min_samples=2,
                                       residual_threshold=5,
                                       random_state=0)
    ransac_none_estimator = RANSACRegressor(None, 2, 5, random_state=0)

    ransac_estimator.fit(X, y)
    ransac_none_estimator.fit(X, y)

    assert_array_almost_equal(ransac_estimator.predict(X),
                              ransac_none_estimator.predict(X))
예제 #27
0
def test_ransac_sparse_csc():
    X_sparse = sparse.csc_matrix(X)

    base_estimator = LinearRegression()
    ransac_estimator = RANSACRegressor(base_estimator,
                                       min_samples=2,
                                       residual_threshold=5,
                                       random_state=0)
    ransac_estimator.fit(X_sparse, y)

    ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_).astype(
        np.bool_)
    ref_inlier_mask[outliers] = False

    assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
예제 #28
0
def test_ransac_exceed_max_skips():
    def is_data_valid(X, y):
        return False

    base_estimator = LinearRegression()
    ransac_estimator = RANSACRegressor(base_estimator,
                                       is_data_valid=is_data_valid,
                                       max_trials=5,
                                       max_skips=3)

    msg = ("RANSAC skipped more iterations than `max_skips`")
    assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y)
    assert ransac_estimator.n_skips_no_inliers_ == 0
    assert ransac_estimator.n_skips_invalid_data_ == 4
    assert ransac_estimator.n_skips_invalid_model_ == 0
예제 #29
0
def test_ransac_residual_loss():
    loss_multi1 = lambda y_true, y_pred: np.sum(np.abs(y_true - y_pred),
                                                axis=1)
    loss_multi2 = lambda y_true, y_pred: np.sum((y_true - y_pred)**2, axis=1)

    loss_mono = lambda y_true, y_pred: np.abs(y_true - y_pred)
    yyy = np.column_stack([y, y, y])

    base_estimator = LinearRegression()
    ransac_estimator0 = RANSACRegressor(base_estimator,
                                        min_samples=2,
                                        residual_threshold=5,
                                        random_state=0)
    ransac_estimator1 = RANSACRegressor(base_estimator,
                                        min_samples=2,
                                        residual_threshold=5,
                                        random_state=0,
                                        loss=loss_multi1)
    ransac_estimator2 = RANSACRegressor(base_estimator,
                                        min_samples=2,
                                        residual_threshold=5,
                                        random_state=0,
                                        loss=loss_multi2)

    # multi-dimensional
    ransac_estimator0.fit(X, yyy)
    ransac_estimator1.fit(X, yyy)
    ransac_estimator2.fit(X, yyy)
    assert_array_almost_equal(ransac_estimator0.predict(X),
                              ransac_estimator1.predict(X))
    assert_array_almost_equal(ransac_estimator0.predict(X),
                              ransac_estimator2.predict(X))

    # one-dimensional
    ransac_estimator0.fit(X, y)
    ransac_estimator2.loss = loss_mono
    ransac_estimator2.fit(X, y)
    assert_array_almost_equal(ransac_estimator0.predict(X),
                              ransac_estimator2.predict(X))
    ransac_estimator3 = RANSACRegressor(base_estimator,
                                        min_samples=2,
                                        residual_threshold=5,
                                        random_state=0,
                                        loss="squared_loss")
    ransac_estimator3.fit(X, y)
    assert_array_almost_equal(ransac_estimator0.predict(X),
                              ransac_estimator2.predict(X))
예제 #30
0
def test_ransac_min_n_samples():
    base_estimator = LinearRegression()
    ransac_estimator1 = RANSACRegressor(base_estimator,
                                        min_samples=2,
                                        residual_threshold=5,
                                        random_state=0)
    ransac_estimator2 = RANSACRegressor(base_estimator,
                                        min_samples=2. / X.shape[0],
                                        residual_threshold=5,
                                        random_state=0)
    ransac_estimator3 = RANSACRegressor(base_estimator,
                                        min_samples=-1,
                                        residual_threshold=5,
                                        random_state=0)
    ransac_estimator4 = RANSACRegressor(base_estimator,
                                        min_samples=5.2,
                                        residual_threshold=5,
                                        random_state=0)
    ransac_estimator5 = RANSACRegressor(base_estimator,
                                        min_samples=2.0,
                                        residual_threshold=5,
                                        random_state=0)
    ransac_estimator6 = RANSACRegressor(base_estimator,
                                        residual_threshold=5,
                                        random_state=0)
    ransac_estimator7 = RANSACRegressor(base_estimator,
                                        min_samples=X.shape[0] + 1,
                                        residual_threshold=5,
                                        random_state=0)

    ransac_estimator1.fit(X, y)
    ransac_estimator2.fit(X, y)
    ransac_estimator5.fit(X, y)
    ransac_estimator6.fit(X, y)

    assert_array_almost_equal(ransac_estimator1.predict(X),
                              ransac_estimator2.predict(X))
    assert_array_almost_equal(ransac_estimator1.predict(X),
                              ransac_estimator5.predict(X))
    assert_array_almost_equal(ransac_estimator1.predict(X),
                              ransac_estimator6.predict(X))

    assert_raises(ValueError, ransac_estimator3.fit, X, y)
    assert_raises(ValueError, ransac_estimator4.fit, X, y)
    assert_raises(ValueError, ransac_estimator7.fit, X, y)