Beispiel #1
0
def test_init_raw_predictions_shapes():
    # Make sure get_init_raw_predictions returns float64 arrays with shape
    # (n_samples, K) where K is 1 for binary classification and regression, and
    # K = n_classes for multiclass classification
    rng = np.random.RandomState(0)

    n_samples = 100
    X = rng.normal(size=(n_samples, 5))
    y = rng.normal(size=n_samples)
    for loss in (LeastSquaresError(n_classes=1),
                 LeastAbsoluteError(n_classes=1),
                 QuantileLossFunction(n_classes=1),
                 HuberLossFunction(n_classes=1)):
        init_estimator = loss.init_estimator().fit(X, y)
        raw_predictions = loss.get_init_raw_predictions(y, init_estimator)
        assert raw_predictions.shape == (n_samples, 1)
        assert raw_predictions.dtype == np.float64

    y = rng.randint(0, 2, size=n_samples)
    for loss in (BinomialDeviance(n_classes=2), ExponentialLoss(n_classes=2)):
        init_estimator = loss.init_estimator().fit(X, y)
        raw_predictions = loss.get_init_raw_predictions(y, init_estimator)
        assert raw_predictions.shape == (n_samples, 1)
        assert raw_predictions.dtype == np.float64

    for n_classes in range(3, 5):
        y = rng.randint(0, n_classes, size=n_samples)
        loss = MultinomialDeviance(n_classes=n_classes)
        init_estimator = loss.init_estimator().fit(X, y)
        raw_predictions = loss.get_init_raw_predictions(y, init_estimator)
        assert raw_predictions.shape == (n_samples, n_classes)
        assert raw_predictions.dtype == np.float64
def test_lad_equals_quantiles(seed, alpha):
    # Make sure quantile loss with alpha = .5 is equivalent to LAD
    lad = LeastAbsoluteError()
    ql = QuantileLossFunction(alpha=alpha)

    n_samples = 50
    rng = np.random.RandomState(seed)
    raw_predictions = rng.normal(size=(n_samples))
    y_true = rng.normal(size=(n_samples))

    lad_loss = lad(y_true, raw_predictions)
    ql_loss = ql(y_true, raw_predictions)
    if alpha == 0.5:
        assert lad_loss == approx(2 * ql_loss)

    weights = np.linspace(0, 1, n_samples)**2
    lad_weighted_loss = lad(y_true, raw_predictions, sample_weight=weights)
    ql_weighted_loss = ql(y_true, raw_predictions, sample_weight=weights)
    if alpha == 0.5:
        assert lad_weighted_loss == approx(2 * ql_weighted_loss)
    pbl_weighted_loss = mean_pinball_loss(y_true,
                                          raw_predictions,
                                          sample_weight=weights,
                                          alpha=alpha)
    assert pbl_weighted_loss == approx(ql_weighted_loss)
Beispiel #3
0
def test_quantile_loss_function():
    # Non regression test for the QuantileLossFunction object
    # There was a sign problem when evaluating the function
    # for negative values of 'ytrue - ypred'
    x = np.asarray([-1.0, 0.0, 1.0])
    y_found = QuantileLossFunction(1, 0.9)(x, np.zeros_like(x))
    y_expected = np.asarray([0.1, 0.0, 0.9]).mean()
    np.testing.assert_allclose(y_found, y_expected)
Beispiel #4
0
def test_lad_equals_quantile_50(seed):
    # Make sure quantile loss with alpha = .5 is equivalent to LAD
    lad = LeastAbsoluteError(n_classes=1)
    ql = QuantileLossFunction(n_classes=1, alpha=0.5)

    n_samples = 50
    rng = np.random.RandomState(seed)
    raw_predictions = rng.normal(size=(n_samples))
    y_true = rng.normal(size=(n_samples))

    lad_loss = lad(y_true, raw_predictions)
    ql_loss = ql(y_true, raw_predictions)
    assert_almost_equal(lad_loss, 2 * ql_loss)

    weights = np.linspace(0, 1, n_samples)**2
    lad_weighted_loss = lad(y_true, raw_predictions, sample_weight=weights)
    ql_weighted_loss = ql(y_true, raw_predictions, sample_weight=weights)
    assert_almost_equal(lad_weighted_loss, 2 * ql_weighted_loss)
Beispiel #5
0
def test_init_raw_predictions_values():
    # Make sure the get_init_raw_predictions() returns the expected values for
    # each loss.
    rng = np.random.RandomState(0)

    n_samples = 100
    X = rng.normal(size=(n_samples, 5))
    y = rng.normal(size=n_samples)

    # Least squares loss
    loss = LeastSquaresError(n_classes=1)
    init_estimator = loss.init_estimator().fit(X, y)
    raw_predictions = loss.get_init_raw_predictions(y, init_estimator)
    # Make sure baseline prediction is the mean of all targets
    assert_almost_equal(raw_predictions, y.mean())

    # Least absolute and huber loss
    for Loss in (LeastAbsoluteError, HuberLossFunction):
        loss = Loss(n_classes=1)
        init_estimator = loss.init_estimator().fit(X, y)
        raw_predictions = loss.get_init_raw_predictions(y, init_estimator)
        # Make sure baseline prediction is the median of all targets
        assert_almost_equal(raw_predictions, np.median(y))

    # Quantile loss
    for alpha in (.1, .5, .9):
        loss = QuantileLossFunction(n_classes=1, alpha=alpha)
        init_estimator = loss.init_estimator().fit(X, y)
        raw_predictions = loss.get_init_raw_predictions(y, init_estimator)
        # Make sure baseline prediction is the alpha-quantile of all targets
        assert_almost_equal(raw_predictions, np.percentile(y, alpha * 100))

    y = rng.randint(0, 2, size=n_samples)

    # Binomial deviance
    loss = BinomialDeviance(n_classes=2)
    init_estimator = loss.init_estimator().fit(X, y)
    # Make sure baseline prediction is equal to link_function(p), where p
    # is the proba of the positive class. We want predict_proba() to return p,
    # and by definition
    # p = inverse_link_function(raw_prediction) = sigmoid(raw_prediction)
    # So we want raw_prediction = link_function(p) = log(p / (1 - p))
    raw_predictions = loss.get_init_raw_predictions(y, init_estimator)
    p = y.mean()
    assert_almost_equal(raw_predictions, np.log(p / (1 - p)))

    # Exponential loss
    loss = ExponentialLoss(n_classes=2)
    init_estimator = loss.init_estimator().fit(X, y)
    raw_predictions = loss.get_init_raw_predictions(y, init_estimator)
    p = y.mean()
    assert_almost_equal(raw_predictions, .5 * np.log(p / (1 - p)))

    # Multinomial deviance loss
    for n_classes in range(3, 5):
        y = rng.randint(0, n_classes, size=n_samples)
        loss = MultinomialDeviance(n_classes=n_classes)
        init_estimator = loss.init_estimator().fit(X, y)
        raw_predictions = loss.get_init_raw_predictions(y, init_estimator)
        for k in range(n_classes):
            p = (y == k).mean()
        assert_almost_equal(raw_predictions[:, k], np.log(p))