예제 #1
0
def test_values_uniform(random_xy_dataset_regr):
    X, y = random_xy_dataset_regr
    mod = RandomRegressor(strategy="uniform")
    predictions = mod.fit(X, y).predict(X)
    assert (predictions >= y.min()).all()
    assert (predictions <= y.max()).all()
    assert mod.min_ == pytest.approx(y.min(), abs=0.0001)
    assert mod.max_ == pytest.approx(y.max(), abs=0.0001)
예제 #2
0
def test_estimator_checks(test_fn):
    # Tests that are skipped:
    # 'check_methods_subset_invariance': Since we add noise, the method is not invariant on a subset
    # 'check_regressors_train': score is not always greater than 0.5 due to randomness
    regr_normal = RandomRegressor(strategy="normal")
    test_fn(RandomRegressor.__name__ + '_normal', regr_normal)

    regr_uniform = RandomRegressor(strategy="uniform")
    test_fn(RandomRegressor.__name__ + '_uniform', regr_uniform)
예제 #3
0
import pytest
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

from sklego.dummy import RandomRegressor
from sklego.linear_model import DeadZoneRegressor
from sklego.mixture import GMMClassifier, BayesianGMMClassifier, GMMOutlierDetector, BayesianGMMOutlierDetector
from tests.conftest import id_func


@pytest.mark.parametrize("estimator", [
    RandomRegressor(strategy="uniform"),
    RandomRegressor(strategy="normal"),
    DeadZoneRegressor(effect="linear", n_iter=100),
    DeadZoneRegressor(effect="quadratic", n_iter=100),
],
                         ids=id_func)
def test_shape_regression(estimator, random_xy_dataset_regr):
    X, y = random_xy_dataset_regr
    assert estimator.fit(X, y).predict(X).shape[0] == y.shape[0]
    pipe = Pipeline(steps=[('scaler', StandardScaler()), ('clf', estimator)])
    assert pipe.fit(X, y).predict(X).shape[0] == y.shape[0]


@pytest.mark.parametrize("estimator", [
    GMMClassifier(),
    BayesianGMMClassifier(),
    GMMOutlierDetector(threshold=0.999, method="quantile"),
    GMMOutlierDetector(threshold=2, method="stddev"),
    BayesianGMMOutlierDetector(threshold=0.999, method="quantile"),
    BayesianGMMOutlierDetector(threshold=2, method="stddev")
예제 #4
0
def test_bad_values():
    with pytest.raises(ValueError):
        RandomRegressor(strategy="foobar")
예제 #5
0
def test_values_normal(random_xy_dataset_regr):
    X, y = random_xy_dataset_regr
    mod = RandomRegressor(strategy="normal").fit(X, y)
    assert mod.mu_ == pytest.approx(np.mean(y), abs=0.001)
    assert mod.sigma_ == pytest.approx(np.std(y), abs=0.001)
예제 #6
0
def test_bad_values():
    np.random.seed(42)
    X = np.random.normal(0, 1, (10, 2))
    y = np.random.normal(0, 1, (10, 1))
    with pytest.raises(ValueError):
        RandomRegressor(strategy="foobar").fit(X, y)
예제 #7
0
from collections import defaultdict

import pytest
from sklearn.linear_model import LinearRegression
from sklearn.utils import estimator_checks

from sklego.dummy import RandomRegressor
from sklego.transformers import EstimatorTransformer, RandomAdder
from tests.conftest import id_func


@pytest.mark.parametrize("estimator", [
    RandomAdder(),
    EstimatorTransformer(LinearRegression()),
    RandomRegressor(),
],
                         ids=id_func)
def test_check_estimator(estimator, monkeypatch):
    """Uses the sklearn `check_estimator` method to verify our custom estimators"""

    # Not all estimators CAN adhere to the defined sklearn api. An example of this is the random adder as sklearn
    # expects methods to be invariant to whether they are applied to the full dataset or a subset.
    # These tests can be monkey patched out using the skips dictionary.
    skips = defaultdict(
        list,
        {
            RandomAdder: [
                # Since we add noise, the method is not invariant on a subset
                'check_methods_subset_invariance',
                # The transformerselectormixin needs to compute a hash and it can't on a 'NotAnArray'
                'check_transformer_data_not_an_array',