def test_asymmetric_error(quantile):
    """Test quantile regression for asymmetric distributed targets."""
    n_samples = 1000
    rng = np.random.RandomState(42)
    X = np.concatenate(
        (
            np.abs(rng.randn(n_samples)[:, None]),
            -rng.randint(2, size=(n_samples, 1)),
        ),
        axis=1,
    )
    intercept = 1.23
    coef = np.array([0.5, -2])
    #  Take care that X @ coef + intercept > 0
    assert np.min(X @ coef + intercept) > 0
    # For an exponential distribution with rate lambda, e.g. exp(-lambda * x),
    # the quantile at level q is:
    #   quantile(q) = - log(1 - q) / lambda
    #   scale = 1/lambda = -quantile(q) / log(1 - q)
    y = rng.exponential(
        scale=-(X @ coef + intercept) / np.log(1 - quantile), size=n_samples
    )
    model = QuantileRegressor(
        quantile=quantile,
        alpha=0,
        solver="highs",
    ).fit(X, y)
    # This test can be made to pass with any solver but in the interest
    # of sparing continuous integration resources, the test is performed
    # with the fastest solver only.

    assert model.intercept_ == approx(intercept, rel=0.2)
    assert_allclose(model.coef_, coef, rtol=0.6)
    assert_allclose(np.mean(model.predict(X) > y), quantile, atol=1e-2)

    # Now compare to Nelder-Mead optimization with L1 penalty
    alpha = 0.01
    model.set_params(alpha=alpha).fit(X, y)
    model_coef = np.r_[model.intercept_, model.coef_]

    def func(coef):
        loss = mean_pinball_loss(y, X @ coef[1:] + coef[0], alpha=quantile)
        L1 = np.sum(np.abs(coef[1:]))
        return loss + alpha * L1

    res = minimize(
        fun=func,
        x0=[1, 0, -1],
        method="Nelder-Mead",
        tol=1e-12,
        options={"maxiter": 2000},
    )

    assert func(model_coef) == approx(func(res.x))
    assert_allclose(model.intercept_, res.x[0])
    assert_allclose(model.coef_, res.x[1:])
    assert_allclose(np.mean(model.predict(X) > y), quantile, atol=1e-2)
def test_quantile_sample_weight():
    # test that with unequal sample weights we still estimate weighted fraction
    n = 1000
    X, y = make_regression(n_samples=n, n_features=5, random_state=0, noise=10.0)
    weight = np.ones(n)
    # when we increase weight of upper observations,
    # estimate of quantile should go up
    weight[y > y.mean()] = 100
    quant = QuantileRegressor(quantile=0.5, alpha=1e-8, solver_options={"lstsq": False})
    quant.fit(X, y, sample_weight=weight)
    fraction_below = np.mean(y < quant.predict(X))
    assert fraction_below > 0.5
    weighted_fraction_below = np.average(y < quant.predict(X), weights=weight)
    assert weighted_fraction_below == approx(0.5, abs=3e-2)
def test_quantile_estimates_calibration(q):
    # Test that model estimates percentage of points below the prediction
    X, y = make_regression(n_samples=1000, n_features=20, random_state=0, noise=1.0)
    quant = QuantileRegressor(
        quantile=q,
        alpha=0,
        solver_options={"lstsq": False},
    ).fit(X, y)
    assert np.mean(y < quant.predict(X)) == approx(q, abs=1e-2)
def test_quantile_equals_huber_for_low_epsilon(fit_intercept):
    X, y = make_regression(n_samples=100, n_features=20, random_state=0, noise=1.0)
    alpha = 1e-4
    huber = HuberRegressor(
        epsilon=1 + 1e-4, alpha=alpha, fit_intercept=fit_intercept
    ).fit(X, y)
    quant = QuantileRegressor(alpha=alpha, fit_intercept=fit_intercept).fit(X, y)
    assert_allclose(huber.coef_, quant.coef_, atol=1e-1)
    if fit_intercept:
        assert huber.intercept_ == approx(quant.intercept_, abs=1e-1)
        # check that we still predict fraction
        assert np.mean(y < quant.predict(X)) == approx(0.5, abs=1e-1)
def test_sparse_input(sparse_format, solver, fit_intercept):
    """Test that sparse and dense X give same results."""
    X, y = make_regression(n_samples=100, n_features=20, random_state=1, noise=1.0)
    X_sparse = sparse_format(X)
    alpha = 1e-4
    quant_dense = QuantileRegressor(alpha=alpha, fit_intercept=fit_intercept).fit(X, y)
    quant_sparse = QuantileRegressor(
        alpha=alpha, fit_intercept=fit_intercept, solver=solver
    ).fit(X_sparse, y)
    assert_allclose(quant_sparse.coef_, quant_dense.coef_, rtol=1e-2)
    if fit_intercept:
        assert quant_sparse.intercept_ == approx(quant_dense.intercept_)
        # check that we still predict fraction
        assert 0.45 <= np.mean(y < quant_sparse.predict(X_sparse)) <= 0.55
Esempio n. 6
0
def get_calibration(train_approximation_distances, train_target_distances,
                    val_approximation_distances, val_target_distances,
                    quantile):
    qr = QuantileRegressor(quantile=quantile, alpha=0, solver='highs')
    qr.fit(train_approximation_distances.reshape(-1, 1),
           train_target_distances)
    predicted_target_distances = qr.predict(
        val_approximation_distances.reshape(-1, 1)).squeeze()

    num_leq = 0

    for predicted_target, val_target in zip(predicted_target_distances,
                                            val_target_distances):
        if val_target <= predicted_target:
            num_leq += 1

    return num_leq / len(val_target_distances), predicted_target_distances