def test_spline_transformer_manual_knot_input():
    """
    Test that array-like knot positions in SplineTransformer are accepted.
    """
    X = np.arange(20).reshape(10, 2)
    knots = [[0.5, 1], [1.5, 2], [5, 10]]
    st1 = SplineTransformer(degree=3, knots=knots).fit(X)
    knots = np.asarray(knots)
    st2 = SplineTransformer(degree=3, knots=knots).fit(X)
    for i in range(X.shape[1]):
        assert_allclose(st1.bsplines_[i].t, st2.bsplines_[i].t)
Beispiel #2
0
def test_spline_transformer_extrapolation(bias, intercept, degree):
    """Test that B-spline extrapolation works correctly."""
    # we use a straight line for that
    X = np.linspace(-1, 1, 100)[:, None]
    y = X.squeeze()

    # 'constant'
    pipe = Pipeline(
        [
            [
                "spline",
                SplineTransformer(
                    n_knots=4,
                    degree=degree,
                    include_bias=bias,
                    extrapolation="constant",
                ),
            ],
            ["ols", LinearRegression(fit_intercept=intercept)],
        ]
    )
    pipe.fit(X, y)
    assert_allclose(pipe.predict([[-10], [5]]), [-1, 1])

    # 'linear'
    pipe = Pipeline(
        [
            [
                "spline",
                SplineTransformer(
                    n_knots=4,
                    degree=degree,
                    include_bias=bias,
                    extrapolation="linear",
                ),
            ],
            ["ols", LinearRegression(fit_intercept=intercept)],
        ]
    )
    pipe.fit(X, y)
    assert_allclose(pipe.predict([[-10], [5]]), [-10, 5])

    # 'error'
    splt = SplineTransformer(
        n_knots=4, degree=degree, include_bias=bias, extrapolation="error"
    )
    splt.fit(X)
    with pytest.raises(ValueError):
        splt.transform([[-10]])
    with pytest.raises(ValueError):
        splt.transform([[5]])
Beispiel #3
0
def test_spline_transformer_periodic_linear_regression(bias, intercept):
    """Test that B-splines fit a periodic curve pretty well."""

    # "+ 3" to avoid the value 0 in assert_allclose
    def f(x):
        return np.sin(2 * np.pi * x) - np.sin(8 * np.pi * x) + 3

    X = np.linspace(0, 1, 101)[:, None]
    pipe = Pipeline(steps=[
        (
            "spline",
            SplineTransformer(
                n_knots=20,
                degree=3,
                include_bias=bias,
                extrapolation="periodic",
            ),
        ),
        ("ols", LinearRegression(fit_intercept=intercept)),
    ])
    pipe.fit(X, f(X[:, 0]))

    # Generate larger array to check periodic extrapolation
    X_ = np.linspace(-1, 2, 301)[:, None]
    predictions = pipe.predict(X_)
    assert_allclose(predictions, f(X_[:, 0]), atol=0.01, rtol=0.01)
    assert_allclose(predictions[0:100], predictions[100:200], rtol=1e-3)
Beispiel #4
0
def test_spline_transformer_periodic_spline_backport():
    """Test that the backport of extrapolate="periodic" works correctly"""
    X = np.linspace(-2, 3.5, 10)[:, None]
    degree = 2

    # Use periodic extrapolation backport in SplineTransformer
    transformer = SplineTransformer(degree=degree,
                                    extrapolation="periodic",
                                    knots=[[-1.0], [0.0], [1.0]])
    Xt = transformer.fit_transform(X)

    # Use periodic extrapolation in BSpline
    coef = np.array([[1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0]])
    spl = BSpline(np.arange(-3, 4), coef, degree, "periodic")
    Xspl = spl(X[:, 0])
    assert_allclose(Xt, Xspl)
Beispiel #5
0
def test_spline_transformer_integer_knots(extrapolation):
    """Test that SplineTransformer accepts integer value knot positions."""
    X = np.arange(20).reshape(10, 2)
    knots = [[0, 1], [1, 2], [5, 5], [11, 10], [12, 11]]
    _ = SplineTransformer(
        degree=3, knots=knots, extrapolation=extrapolation
    ).fit_transform(X)
def test_spline_transformer_unity_decomposition(degree, n_knots, knots):
    """Test that B-splines are indeed a decomposition of unity.

    Splines basis functions must sum up to 1 per row, if we stay in between
    boundaries.
    """
    X = np.linspace(0, 1, 100)[:, None]
    # make the boundaries 0 and 1 part of X_train, for sure.
    X_train = np.r_[[[0]], X[::2, :], [[1]]]
    X_test = X[1::2, :]
    splt = SplineTransformer(n_knots=n_knots,
                             degree=degree,
                             knots=knots,
                             include_bias=True)
    splt.fit(X_train)
    for X in [X_train, X_test]:
        assert_allclose(np.sum(splt.transform(X), axis=1), 1)
Beispiel #7
0
def test_spline_transformer_get_base_knot_positions(knots, n_knots,
                                                    sample_weight,
                                                    expected_knots):
    # Check the behaviour to find the positions of the knots with and without
    # `sample_weight`
    X = np.array([[0, 2], [0, 2], [2, 2], [3, 3], [4, 6], [5, 8], [6, 14]])
    base_knots = SplineTransformer._get_base_knot_positions(
        X=X, knots=knots, n_knots=n_knots, sample_weight=sample_weight)
    assert_allclose(base_knots, expected_knots)
Beispiel #8
0
def test_spline_transformer_kbindiscretizer():
    """Test that a B-spline of degree=0 is equivalent to KBinsDiscretizer."""
    rng = np.random.RandomState(97531)
    X = rng.randn(200).reshape(200, 1)
    n_bins = 5
    n_knots = n_bins + 1

    splt = SplineTransformer(
        n_knots=n_knots, degree=0, knots="quantile", include_bias=True
    )
    splines = splt.fit_transform(X)

    kbd = KBinsDiscretizer(n_bins=n_bins, encode="onehot-dense", strategy="quantile")
    kbins = kbd.fit_transform(X)

    # Though they should be exactly equal, we test approximately with high
    # accuracy.
    assert_allclose(splines, kbins, rtol=1e-13)
Beispiel #9
0
def periodic_spline_transformer(period, n_splines=None, degree=3):
    if n_splines is None:
        n_splines = period
    n_knots = n_splines + 1  # periodic and include_bias is True
    return SplineTransformer(
        degree=degree,
        n_knots=n_knots,
        knots=np.linspace(0, period, n_knots).reshape(n_knots, 1),
        extrapolation="periodic",
        include_bias=True,
    )
Beispiel #10
0
def test_spline_transformer_periodic_splines_smoothness(degree):
    """Test that spline transformation is smooth at first / last knot."""
    X = np.linspace(-2, 10, 10_000)[:, None]

    transformer = SplineTransformer(
        degree=degree,
        extrapolation="periodic",
        knots=[[0.0], [1.0], [3.0], [4.0], [5.0], [8.0]],
    )
    Xt = transformer.fit_transform(X)

    delta = (X.max() - X.min()) / len(X)
    tol = 10 * delta

    dXt = Xt
    # We expect splines of degree `degree` to be (`degree`-1) times
    # continuously differentiable. I.e. for d = 0, ..., `degree` - 1 the d-th
    # derivative should be continuous. This is the case if the (d+1)-th
    # numerical derivative is reasonably small (smaller than `tol` in absolute
    # value). We thus compute d-th numeric derivatives for d = 1, ..., `degree`
    # and compare them to `tol`.
    #
    # Note that the 0-th derivative is the function itself, such that we are
    # also checking its continuity.
    for d in range(1, degree + 1):
        # Check continuity of the (d-1)-th derivative
        diff = np.diff(dXt, axis=0)
        assert np.abs(diff).max() < tol
        # Compute d-th numeric derivative
        dXt = diff / delta

    # As degree `degree` splines are not `degree` times continuously
    # differentiable at the knots, the `degree + 1`-th numeric derivative
    # should have spikes at the knots.
    diff = np.diff(dXt, axis=0)
    assert np.abs(diff).max() > 1
Beispiel #11
0
def test_spline_transformer_feature_names(get_names):
    """Test that SplineTransformer generates correct features name."""
    X = np.arange(20).reshape(10, 2)
    splt = SplineTransformer(n_knots=3, degree=3, include_bias=True).fit(X)
    feature_names = getattr(splt, get_names)()
    assert_array_equal(
        feature_names,
        [
            "x0_sp_0",
            "x0_sp_1",
            "x0_sp_2",
            "x0_sp_3",
            "x0_sp_4",
            "x1_sp_0",
            "x1_sp_1",
            "x1_sp_2",
            "x1_sp_3",
            "x1_sp_4",
        ],
    )

    splt = SplineTransformer(n_knots=3, degree=3, include_bias=False).fit(X)
    feature_names = getattr(splt, get_names)(["a", "b"])
    assert_array_equal(
        feature_names,
        [
            "a_sp_0",
            "a_sp_1",
            "a_sp_2",
            "a_sp_3",
            "b_sp_0",
            "b_sp_1",
            "b_sp_2",
            "b_sp_3",
        ],
    )
Beispiel #12
0
def test_spline_transformer_periodicity_of_extrapolation(knots, n_knots, degree):
    """Test that the SplineTransformer is periodic for multiple features."""
    X_1 = linspace((-1, 0), (1, 5), 10)
    X_2 = linspace((1, 5), (3, 10), 10)

    splt = SplineTransformer(
        knots=knots, n_knots=n_knots, degree=degree, extrapolation="periodic"
    )
    splt.fit(X_1)

    assert_allclose(splt.transform(X_1), splt.transform(X_2))
Beispiel #13
0
def test_spline_transformer_linear_regression(bias, intercept):
    """Test that B-splines fit a sinusodial curve pretty well."""
    X = np.linspace(0, 10, 100)[:, None]
    y = np.sin(X[:, 0]) + 2  # +2 to avoid the value 0 in assert_allclose
    pipe = Pipeline(steps=[
        (
            "spline",
            SplineTransformer(
                n_knots=15,
                degree=3,
                include_bias=bias,
                extrapolation="constant",
            ),
        ),
        ("ols", LinearRegression(fit_intercept=intercept)),
    ])
    pipe.fit(X, y)
    assert_allclose(pipe.predict(X), y, rtol=1e-3)
Beispiel #14
0
def test_spline_transformer_periodic_splines_periodicity():
    """
    Test if shifted knots result in the same transformation up to permutation.
    """
    X = np.linspace(0, 10, 101)[:, None]

    transformer_1 = SplineTransformer(degree=3,
                                      extrapolation="periodic",
                                      knots=[[0.0], [1.0], [3.0], [4.0], [5.0],
                                             [8.0]])

    transformer_2 = SplineTransformer(degree=3,
                                      extrapolation="periodic",
                                      knots=[[1.0], [3.0], [4.0], [5.0], [8.0],
                                             [9.0]])

    Xt_1 = transformer_1.fit_transform(X)
    Xt_2 = transformer_2.fit_transform(X)

    assert_allclose(Xt_1, Xt_2[:, [4, 0, 1, 2, 3]])
Beispiel #15
0
def test_spline_transformer_input_validation(params, err_msg):
    """Test that we raise errors for invalid input in SplineTransformer."""
    X = [[1], [2]]

    with pytest.raises(ValueError, match=err_msg):
        SplineTransformer(**params).fit(X)
ax.set_prop_cycle(
    color=["black", "teal", "yellowgreen", "gold", "darkorange", "tomato"])
ax.plot(x_plot, f(x_plot), linewidth=lw, label="ground truth")

# plot training points
ax.scatter(x_train, y_train, label="training points")

# polynomial features
for degree in [3, 4, 5]:
    model = make_pipeline(PolynomialFeatures(degree), Ridge(alpha=1e-3))
    model.fit(X_train, y_train)
    y_plot = model.predict(X_plot)
    ax.plot(x_plot, y_plot, label=f"degree {degree}")

# B-spline with 4 + 3 - 1 = 6 basis functions
model = make_pipeline(SplineTransformer(n_knots=4, degree=3),
                      Ridge(alpha=1e-3))
model.fit(X_train, y_train)

y_plot = model.predict(X_plot)
ax.plot(x_plot, y_plot, label="B-spline")
ax.legend(loc="lower center")
ax.set_ylim(-20, 10)
plt.show()

# %%
# This shows nicely that higher degree polynomials can fit the data better. But
# at the same time, too high powers can show unwanted oscillatory behaviour
# and are particularly dangerous for extrapolation beyond the range of fitted
# data. This is an advantage of B-splines. They usually fit the data as well as
# polynomials and show very nice and smooth behaviour. They have also good
Beispiel #17
0
# :class:`~preprocessing.SplineTransformer`. Splines are piecewise polynomials,
# parametrized by their polynomial degree and the positions of the knots. The
# :class:`~preprocessing.SplineTransformer` implements a B-spline basis.
#
# .. figure:: ../linear_model/images/sphx_glr_plot_polynomial_interpolation_001.png
#   :target: ../linear_model/plot_polynomial_interpolation.html
#   :align: center
#
# The following code shows splines in action, for more information, please
# refer to the :ref:`User Guide <spline_transformer>`.

import numpy as np
from sklearn.preprocessing import SplineTransformer

X = np.arange(5).reshape(5, 1)
spline = SplineTransformer(degree=2, n_knots=3)
spline.fit_transform(X)


##############################################################################
# Quantile Regressor
# --------------------------------------------------------------------------
# Quantile regression estimates the median or other quantiles of :math:`y`
# conditional on :math:`X`, while ordinary least squares (OLS) estimates the
# conditional mean.
#
# As a linear model, the new :class:`~linear_model.QuantileRegressor` gives
# linear predictions :math:`\hat{y}(w, X) = Xw` for the :math:`q`-th quantile,
# :math:`q \in (0, 1)`. The weights or coefficients :math:`w` are then found by
# the following minimization problem:
#
Beispiel #18
0
ax.set_prop_cycle(
    color=["black", "teal", "yellowgreen", "gold", "darkorange", "tomato"])
ax.plot(x_plot, f(x_plot), linewidth=lw, label="ground truth")

# plot training points
ax.scatter(x_train, y_train, label="training points")

# polynomial features
for degree in [3, 4, 5]:
    model = make_pipeline(PolynomialFeatures(degree), Ridge(alpha=1e-3))
    model.fit(X_train, y_train)
    y_plot = model.predict(X_plot)
    ax.plot(x_plot, y_plot, label=f"degree {degree}")

# B-spline with 4 + 3 - 1 = 6 basis functions
model = make_pipeline(SplineTransformer(n_knots=4, degree=3),
                      Ridge(alpha=1e-3))
model.fit(X_train, y_train)

y_plot = model.predict(X_plot)
ax.plot(x_plot, y_plot, label="B-spline")
ax.legend(loc='lower center')
ax.set_ylim(-20, 10)
plt.show()

# %%
# This shows nicely that higher degree polynomials can fit the data better. But
# at the same time, too high powers can show unwanted oscillatory behaviour
# and are particularly dangerous for extrapolation beyond the range of fitted
# data. This is an advantage of B-splines. They usually fit the data as well as
# polynomials and show very nice and smooth behaviour. They have also good