Exemple #1
0
def test_tweedie_distribution_power():
    msg = "distribution is only defined for power<=0 and power>=1"
    with pytest.raises(ValueError, match=msg):
        TweedieDistribution(power=0.5)

    with pytest.raises(TypeError, match="must be a real number"):
        TweedieDistribution(power=1j)

    with pytest.raises(TypeError, match="must be a real number"):
        dist = TweedieDistribution()
        dist.power = 1j

    dist = TweedieDistribution()
    assert isinstance(dist._lower_bound, DistributionBoundary)

    assert dist._lower_bound.inclusive is False
    dist.power = 1
    assert dist._lower_bound.value == 0.0
    assert dist._lower_bound.inclusive is True
Exemple #2
0
def test_tweedie_regression_family(regression_data):
    # Make sure the family attribute is always a TweedieDistribution and that
    # the power attribute is properly updated
    power = 2.0
    est = TweedieRegressor(power=power)
    assert isinstance(est.family, TweedieDistribution)
    assert est.family.power == power
    assert est.power == power

    new_power = 0
    new_family = TweedieDistribution(power=new_power)
    est.family = new_family
    assert isinstance(est.family, TweedieDistribution)
    assert est.family.power == new_power
    assert est.power == new_power

    msg = "TweedieRegressor.family must be of type TweedieDistribution!"
    with pytest.raises(TypeError, match=msg):
        est.family = None
Exemple #3
0
def test_invalid_distribution_bound():
    dist = TweedieDistribution()
    dist._lower_bound = 0
    with pytest.raises(TypeError,
                       match="must be of type DistributionBoundary"):
        dist.in_y_range([-1, 0, 1])
Exemple #4
0
from sklearn._loss.glm_distribution import (
    TweedieDistribution,
    NormalDistribution,
    PoissonDistribution,
    GammaDistribution,
    InverseGaussianDistribution,
    DistributionBoundary,
)


@pytest.mark.parametrize(
    "family, expected",
    [
        (NormalDistribution(), [True, True, True]),
        (PoissonDistribution(), [False, True, True]),
        (TweedieDistribution(power=1.5), [False, True, True]),
        (GammaDistribution(), [False, False, True]),
        (InverseGaussianDistribution(), [False, False, True]),
        (TweedieDistribution(power=4.5), [False, False, True]),
    ],
)
def test_family_bounds(family, expected):
    """Test the valid range of distributions at -1, 0, 1."""
    result = family.in_y_range([-1, 0, 1])
    assert_array_equal(result, expected)


def test_invalid_distribution_bound():
    dist = TweedieDistribution()
    dist._lower_bound = 0
    with pytest.raises(TypeError,
    [
        (PoissonRegressor(), True),
        (GammaRegressor(), True),
        (TweedieRegressor(power=1.5), True),
        (TweedieRegressor(power=0), False),
    ],
)
def test_tags(estimator, value):
    assert estimator._get_tags()["requires_positive_y"] is value


# TODO(1.3): remove
@pytest.mark.parametrize(
    "est, family",
    [
        (PoissonRegressor(), "poisson"),
        (GammaRegressor(), "gamma"),
        (TweedieRegressor(), TweedieDistribution()),
        (TweedieRegressor(power=2), TweedieDistribution(power=2)),
        (TweedieRegressor(power=3), TweedieDistribution(power=3)),
    ],
)
def test_family_deprecation(est, family):
    """Test backward compatibility of the family property."""
    with pytest.warns(FutureWarning, match="`family` was deprecated"):
        if isinstance(family, str):
            assert est.family == family
        else:
            assert est.family.__class__ == family.__class__
            assert est.family.power == family.power
Exemple #6
0
        X, y, sample_weight=sample_weight_1
    )

    glm2 = GeneralizedLinearRegressor(**glm_params).fit(X2, y2, sample_weight=None)
    assert_allclose(glm1.coef_, glm2.coef_)


@pytest.mark.parametrize("fit_intercept", [True, False])
@pytest.mark.parametrize(
    "family",
    [
        NormalDistribution(),
        PoissonDistribution(),
        GammaDistribution(),
        InverseGaussianDistribution(),
        TweedieDistribution(power=1.5),
        TweedieDistribution(power=4.5),
    ],
)
def test_glm_log_regression(fit_intercept, family):
    """Test GLM regression with log link on a simple dataset."""
    coef = [0.2, -0.1]
    X = np.array([[1, 1, 1, 1, 1], [0, 1, 2, 3, 4]]).T
    y = np.exp(np.dot(X, coef))
    glm = GeneralizedLinearRegressor(
        alpha=0, family=family, link="log", fit_intercept=fit_intercept, tol=1e-7
    )
    if fit_intercept:
        res = glm.fit(X[:, 1:], y)
        assert_allclose(res.coef_, coef[1:], rtol=1e-6)
        assert_allclose(res.intercept_, coef[0], rtol=1e-6)