def test_spline_transformer_manual_knot_input(): """ Test that array-like knot positions in SplineTransformer are accepted. """ X = np.arange(20).reshape(10, 2) knots = [[0.5, 1], [1.5, 2], [5, 10]] st1 = SplineTransformer(degree=3, knots=knots).fit(X) knots = np.asarray(knots) st2 = SplineTransformer(degree=3, knots=knots).fit(X) for i in range(X.shape[1]): assert_allclose(st1.bsplines_[i].t, st2.bsplines_[i].t)
def test_spline_transformer_extrapolation(bias, intercept, degree): """Test that B-spline extrapolation works correctly.""" # we use a straight line for that X = np.linspace(-1, 1, 100)[:, None] y = X.squeeze() # 'constant' pipe = Pipeline( [ [ "spline", SplineTransformer( n_knots=4, degree=degree, include_bias=bias, extrapolation="constant", ), ], ["ols", LinearRegression(fit_intercept=intercept)], ] ) pipe.fit(X, y) assert_allclose(pipe.predict([[-10], [5]]), [-1, 1]) # 'linear' pipe = Pipeline( [ [ "spline", SplineTransformer( n_knots=4, degree=degree, include_bias=bias, extrapolation="linear", ), ], ["ols", LinearRegression(fit_intercept=intercept)], ] ) pipe.fit(X, y) assert_allclose(pipe.predict([[-10], [5]]), [-10, 5]) # 'error' splt = SplineTransformer( n_knots=4, degree=degree, include_bias=bias, extrapolation="error" ) splt.fit(X) with pytest.raises(ValueError): splt.transform([[-10]]) with pytest.raises(ValueError): splt.transform([[5]])
def test_spline_transformer_periodic_linear_regression(bias, intercept): """Test that B-splines fit a periodic curve pretty well.""" # "+ 3" to avoid the value 0 in assert_allclose def f(x): return np.sin(2 * np.pi * x) - np.sin(8 * np.pi * x) + 3 X = np.linspace(0, 1, 101)[:, None] pipe = Pipeline(steps=[ ( "spline", SplineTransformer( n_knots=20, degree=3, include_bias=bias, extrapolation="periodic", ), ), ("ols", LinearRegression(fit_intercept=intercept)), ]) pipe.fit(X, f(X[:, 0])) # Generate larger array to check periodic extrapolation X_ = np.linspace(-1, 2, 301)[:, None] predictions = pipe.predict(X_) assert_allclose(predictions, f(X_[:, 0]), atol=0.01, rtol=0.01) assert_allclose(predictions[0:100], predictions[100:200], rtol=1e-3)
def test_spline_transformer_periodic_spline_backport(): """Test that the backport of extrapolate="periodic" works correctly""" X = np.linspace(-2, 3.5, 10)[:, None] degree = 2 # Use periodic extrapolation backport in SplineTransformer transformer = SplineTransformer(degree=degree, extrapolation="periodic", knots=[[-1.0], [0.0], [1.0]]) Xt = transformer.fit_transform(X) # Use periodic extrapolation in BSpline coef = np.array([[1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0]]) spl = BSpline(np.arange(-3, 4), coef, degree, "periodic") Xspl = spl(X[:, 0]) assert_allclose(Xt, Xspl)
def test_spline_transformer_integer_knots(extrapolation): """Test that SplineTransformer accepts integer value knot positions.""" X = np.arange(20).reshape(10, 2) knots = [[0, 1], [1, 2], [5, 5], [11, 10], [12, 11]] _ = SplineTransformer( degree=3, knots=knots, extrapolation=extrapolation ).fit_transform(X)
def test_spline_transformer_unity_decomposition(degree, n_knots, knots): """Test that B-splines are indeed a decomposition of unity. Splines basis functions must sum up to 1 per row, if we stay in between boundaries. """ X = np.linspace(0, 1, 100)[:, None] # make the boundaries 0 and 1 part of X_train, for sure. X_train = np.r_[[[0]], X[::2, :], [[1]]] X_test = X[1::2, :] splt = SplineTransformer(n_knots=n_knots, degree=degree, knots=knots, include_bias=True) splt.fit(X_train) for X in [X_train, X_test]: assert_allclose(np.sum(splt.transform(X), axis=1), 1)
def test_spline_transformer_get_base_knot_positions(knots, n_knots, sample_weight, expected_knots): # Check the behaviour to find the positions of the knots with and without # `sample_weight` X = np.array([[0, 2], [0, 2], [2, 2], [3, 3], [4, 6], [5, 8], [6, 14]]) base_knots = SplineTransformer._get_base_knot_positions( X=X, knots=knots, n_knots=n_knots, sample_weight=sample_weight) assert_allclose(base_knots, expected_knots)
def test_spline_transformer_kbindiscretizer(): """Test that a B-spline of degree=0 is equivalent to KBinsDiscretizer.""" rng = np.random.RandomState(97531) X = rng.randn(200).reshape(200, 1) n_bins = 5 n_knots = n_bins + 1 splt = SplineTransformer( n_knots=n_knots, degree=0, knots="quantile", include_bias=True ) splines = splt.fit_transform(X) kbd = KBinsDiscretizer(n_bins=n_bins, encode="onehot-dense", strategy="quantile") kbins = kbd.fit_transform(X) # Though they should be exactly equal, we test approximately with high # accuracy. assert_allclose(splines, kbins, rtol=1e-13)
def periodic_spline_transformer(period, n_splines=None, degree=3): if n_splines is None: n_splines = period n_knots = n_splines + 1 # periodic and include_bias is True return SplineTransformer( degree=degree, n_knots=n_knots, knots=np.linspace(0, period, n_knots).reshape(n_knots, 1), extrapolation="periodic", include_bias=True, )
def test_spline_transformer_periodic_splines_smoothness(degree): """Test that spline transformation is smooth at first / last knot.""" X = np.linspace(-2, 10, 10_000)[:, None] transformer = SplineTransformer( degree=degree, extrapolation="periodic", knots=[[0.0], [1.0], [3.0], [4.0], [5.0], [8.0]], ) Xt = transformer.fit_transform(X) delta = (X.max() - X.min()) / len(X) tol = 10 * delta dXt = Xt # We expect splines of degree `degree` to be (`degree`-1) times # continuously differentiable. I.e. for d = 0, ..., `degree` - 1 the d-th # derivative should be continuous. This is the case if the (d+1)-th # numerical derivative is reasonably small (smaller than `tol` in absolute # value). We thus compute d-th numeric derivatives for d = 1, ..., `degree` # and compare them to `tol`. # # Note that the 0-th derivative is the function itself, such that we are # also checking its continuity. for d in range(1, degree + 1): # Check continuity of the (d-1)-th derivative diff = np.diff(dXt, axis=0) assert np.abs(diff).max() < tol # Compute d-th numeric derivative dXt = diff / delta # As degree `degree` splines are not `degree` times continuously # differentiable at the knots, the `degree + 1`-th numeric derivative # should have spikes at the knots. diff = np.diff(dXt, axis=0) assert np.abs(diff).max() > 1
def test_spline_transformer_feature_names(get_names): """Test that SplineTransformer generates correct features name.""" X = np.arange(20).reshape(10, 2) splt = SplineTransformer(n_knots=3, degree=3, include_bias=True).fit(X) feature_names = getattr(splt, get_names)() assert_array_equal( feature_names, [ "x0_sp_0", "x0_sp_1", "x0_sp_2", "x0_sp_3", "x0_sp_4", "x1_sp_0", "x1_sp_1", "x1_sp_2", "x1_sp_3", "x1_sp_4", ], ) splt = SplineTransformer(n_knots=3, degree=3, include_bias=False).fit(X) feature_names = getattr(splt, get_names)(["a", "b"]) assert_array_equal( feature_names, [ "a_sp_0", "a_sp_1", "a_sp_2", "a_sp_3", "b_sp_0", "b_sp_1", "b_sp_2", "b_sp_3", ], )
def test_spline_transformer_periodicity_of_extrapolation(knots, n_knots, degree): """Test that the SplineTransformer is periodic for multiple features.""" X_1 = linspace((-1, 0), (1, 5), 10) X_2 = linspace((1, 5), (3, 10), 10) splt = SplineTransformer( knots=knots, n_knots=n_knots, degree=degree, extrapolation="periodic" ) splt.fit(X_1) assert_allclose(splt.transform(X_1), splt.transform(X_2))
def test_spline_transformer_linear_regression(bias, intercept): """Test that B-splines fit a sinusodial curve pretty well.""" X = np.linspace(0, 10, 100)[:, None] y = np.sin(X[:, 0]) + 2 # +2 to avoid the value 0 in assert_allclose pipe = Pipeline(steps=[ ( "spline", SplineTransformer( n_knots=15, degree=3, include_bias=bias, extrapolation="constant", ), ), ("ols", LinearRegression(fit_intercept=intercept)), ]) pipe.fit(X, y) assert_allclose(pipe.predict(X), y, rtol=1e-3)
def test_spline_transformer_periodic_splines_periodicity(): """ Test if shifted knots result in the same transformation up to permutation. """ X = np.linspace(0, 10, 101)[:, None] transformer_1 = SplineTransformer(degree=3, extrapolation="periodic", knots=[[0.0], [1.0], [3.0], [4.0], [5.0], [8.0]]) transformer_2 = SplineTransformer(degree=3, extrapolation="periodic", knots=[[1.0], [3.0], [4.0], [5.0], [8.0], [9.0]]) Xt_1 = transformer_1.fit_transform(X) Xt_2 = transformer_2.fit_transform(X) assert_allclose(Xt_1, Xt_2[:, [4, 0, 1, 2, 3]])
def test_spline_transformer_input_validation(params, err_msg): """Test that we raise errors for invalid input in SplineTransformer.""" X = [[1], [2]] with pytest.raises(ValueError, match=err_msg): SplineTransformer(**params).fit(X)
ax.set_prop_cycle( color=["black", "teal", "yellowgreen", "gold", "darkorange", "tomato"]) ax.plot(x_plot, f(x_plot), linewidth=lw, label="ground truth") # plot training points ax.scatter(x_train, y_train, label="training points") # polynomial features for degree in [3, 4, 5]: model = make_pipeline(PolynomialFeatures(degree), Ridge(alpha=1e-3)) model.fit(X_train, y_train) y_plot = model.predict(X_plot) ax.plot(x_plot, y_plot, label=f"degree {degree}") # B-spline with 4 + 3 - 1 = 6 basis functions model = make_pipeline(SplineTransformer(n_knots=4, degree=3), Ridge(alpha=1e-3)) model.fit(X_train, y_train) y_plot = model.predict(X_plot) ax.plot(x_plot, y_plot, label="B-spline") ax.legend(loc="lower center") ax.set_ylim(-20, 10) plt.show() # %% # This shows nicely that higher degree polynomials can fit the data better. But # at the same time, too high powers can show unwanted oscillatory behaviour # and are particularly dangerous for extrapolation beyond the range of fitted # data. This is an advantage of B-splines. They usually fit the data as well as # polynomials and show very nice and smooth behaviour. They have also good
# :class:`~preprocessing.SplineTransformer`. Splines are piecewise polynomials, # parametrized by their polynomial degree and the positions of the knots. The # :class:`~preprocessing.SplineTransformer` implements a B-spline basis. # # .. figure:: ../linear_model/images/sphx_glr_plot_polynomial_interpolation_001.png # :target: ../linear_model/plot_polynomial_interpolation.html # :align: center # # The following code shows splines in action, for more information, please # refer to the :ref:`User Guide <spline_transformer>`. import numpy as np from sklearn.preprocessing import SplineTransformer X = np.arange(5).reshape(5, 1) spline = SplineTransformer(degree=2, n_knots=3) spline.fit_transform(X) ############################################################################## # Quantile Regressor # -------------------------------------------------------------------------- # Quantile regression estimates the median or other quantiles of :math:`y` # conditional on :math:`X`, while ordinary least squares (OLS) estimates the # conditional mean. # # As a linear model, the new :class:`~linear_model.QuantileRegressor` gives # linear predictions :math:`\hat{y}(w, X) = Xw` for the :math:`q`-th quantile, # :math:`q \in (0, 1)`. The weights or coefficients :math:`w` are then found by # the following minimization problem: #
ax.set_prop_cycle( color=["black", "teal", "yellowgreen", "gold", "darkorange", "tomato"]) ax.plot(x_plot, f(x_plot), linewidth=lw, label="ground truth") # plot training points ax.scatter(x_train, y_train, label="training points") # polynomial features for degree in [3, 4, 5]: model = make_pipeline(PolynomialFeatures(degree), Ridge(alpha=1e-3)) model.fit(X_train, y_train) y_plot = model.predict(X_plot) ax.plot(x_plot, y_plot, label=f"degree {degree}") # B-spline with 4 + 3 - 1 = 6 basis functions model = make_pipeline(SplineTransformer(n_knots=4, degree=3), Ridge(alpha=1e-3)) model.fit(X_train, y_train) y_plot = model.predict(X_plot) ax.plot(x_plot, y_plot, label="B-spline") ax.legend(loc='lower center') ax.set_ylim(-20, 10) plt.show() # %% # This shows nicely that higher degree polynomials can fit the data better. But # at the same time, too high powers can show unwanted oscillatory behaviour # and are particularly dangerous for extrapolation beyond the range of fitted # data. This is an advantage of B-splines. They usually fit the data as well as # polynomials and show very nice and smooth behaviour. They have also good