def test_linprog_failure(): """Test that linprog fails.""" X = np.linspace(0, 10, num=10).reshape(-1, 1) y = np.linspace(0, 10, num=10) reg = QuantileRegressor( alpha=0, solver="interior-point", solver_options={"maxiter": 1} ) msg = "Linear programming for QuantileRegressor did not succeed." with pytest.warns(ConvergenceWarning, match=msg): reg.fit(X, y)
def test_quantile_sample_weight(): # test that with unequal sample weights we still estimate weighted fraction n = 1000 X, y = make_regression(n_samples=n, n_features=5, random_state=0, noise=10.0) weight = np.ones(n) # when we increase weight of upper observations, # estimate of quantile should go up weight[y > y.mean()] = 100 quant = QuantileRegressor(quantile=0.5, alpha=1e-8, solver_options={"lstsq": False}) quant.fit(X, y, sample_weight=weight) fraction_below = np.mean(y < quant.predict(X)) assert fraction_below > 0.5 weighted_fraction_below = np.average(y < quant.predict(X), weights=weight) assert weighted_fraction_below == approx(0.5, abs=3e-2)
def get_calibration(train_approximation_distances, train_target_distances, val_approximation_distances, val_target_distances, quantile): qr = QuantileRegressor(quantile=quantile, alpha=0, solver='highs') qr.fit(train_approximation_distances.reshape(-1, 1), train_target_distances) predicted_target_distances = qr.predict( val_approximation_distances.reshape(-1, 1)).squeeze() num_leq = 0 for predicted_target, val_target in zip(predicted_target_distances, val_target_distances): if val_target <= predicted_target: num_leq += 1 return num_leq / len(val_target_distances), predicted_target_distances
def test_equivariance(quantile): """Test equivariace of quantile regression. See Koenker (2005) Quantile Regression, Chapter 2.2.3. """ rng = np.random.RandomState(42) n_samples, n_features = 100, 5 X, y = make_regression( n_samples=n_samples, n_features=n_features, n_informative=n_features, noise=0, random_state=rng, shuffle=False, ) # make y asymmetric y += rng.exponential(scale=100, size=y.shape) params = dict(alpha=0, solver_options={"lstsq": True, "tol": 1e-10}) model1 = QuantileRegressor(quantile=quantile, **params).fit(X, y) # coef(q; a*y, X) = a * coef(q; y, X) a = 2.5 model2 = QuantileRegressor(quantile=quantile, **params).fit(X, a * y) assert model2.intercept_ == approx(a * model1.intercept_, rel=1e-5) assert_allclose(model2.coef_, a * model1.coef_, rtol=1e-5) # coef(1-q; -a*y, X) = -a * coef(q; y, X) model2 = QuantileRegressor(quantile=1 - quantile, **params).fit(X, -a * y) assert model2.intercept_ == approx(-a * model1.intercept_, rel=1e-5) assert_allclose(model2.coef_, -a * model1.coef_, rtol=1e-5) # coef(q; y + X @ g, X) = coef(q; y, X) + g g_intercept, g_coef = rng.randn(), rng.randn(n_features) model2 = QuantileRegressor(quantile=quantile, **params) model2.fit(X, y + X @ g_coef + g_intercept) assert model2.intercept_ == approx(model1.intercept_ + g_intercept) assert_allclose(model2.coef_, model1.coef_ + g_coef, rtol=1e-6) # coef(q; y, X @ A) = A^-1 @ coef(q; y, X) A = rng.randn(n_features, n_features) model2 = QuantileRegressor(quantile=quantile, **params) model2.fit(X @ A, y) assert model2.intercept_ == approx(model1.intercept_, rel=1e-5) assert_allclose(model2.coef_, np.linalg.solve(A, model1.coef_), rtol=1e-5)
# ----------------------------- # # In this section, we want to estimate the conditional median as well as # a low and high quantile fixed at 5% and 95%, respectively. Thus, we will get # three linear models, one for each quantile. # # We will use the quantiles at 5% and 95% to find the outliers in the training # sample beyond the central 90% interval. from sklearn.linear_model import QuantileRegressor quantiles = [0.05, 0.5, 0.95] predictions = {} out_bounds_predictions = np.zeros_like(y_true_mean, dtype=np.bool_) for quantile in quantiles: qr = QuantileRegressor(quantile=quantile, alpha=0) y_pred = qr.fit(X, y_normal).predict(X) predictions[quantile] = y_pred if quantile == min(quantiles): out_bounds_predictions = np.logical_or( out_bounds_predictions, y_pred >= y_normal ) elif quantile == max(quantiles): out_bounds_predictions = np.logical_or( out_bounds_predictions, y_pred <= y_normal ) # %% # Now, we can plot the three linear models and the distinguished samples that # are within the central 90% interval from samples that are outside this # interval.
def test_warning_new_default(X_y_data): """Check that we warn about the new default solver.""" X, y = X_y_data model = QuantileRegressor() with pytest.warns(FutureWarning, match="The default solver will change"): model.fit(X, y)