Example #1
0
def test_y_mean_attribute_regressor():
    X = [[0]] * 5
    y = [1, 2, 4, 6, 8]
    # when strategy = 'mean'
    est = DummyRegressor(strategy='mean')
    est.fit(X, y)

    assert est.constant_ == np.mean(y)
Example #2
0
def test_dummy_regressor_on_3D_array():
    X = np.array([[['foo']], [['bar']], [['baz']]])
    y = np.array([2, 2, 2])
    y_expected = np.array([2, 2, 2])
    cls = DummyRegressor()
    cls.fit(X, y)
    y_pred = cls.predict(X)
    assert_array_equal(y_pred, y_expected)
Example #3
0
def test_median_strategy_regressor():

    random_state = np.random.RandomState(seed=1)

    X = [[0]] * 5  # ignored
    y = random_state.randn(5)

    reg = DummyRegressor(strategy="median")
    reg.fit(X, y)
    assert_array_equal(reg.predict(X), [np.median(y)] * len(X))
Example #4
0
def test_weights_regressor():
    """Check weighted average regression prediction on boston dataset."""
    reg1 = DummyRegressor(strategy='mean')
    reg2 = DummyRegressor(strategy='median')
    reg3 = DummyRegressor(strategy='quantile', quantile=.2)
    ereg = VotingRegressor([('mean', reg1), ('median', reg2),
                            ('quantile', reg3)],
                           weights=[1, 2, 10])

    X_r_train, X_r_test, y_r_train, y_r_test = \
        train_test_split(X_r, y_r, test_size=.25)

    reg1_pred = reg1.fit(X_r_train, y_r_train).predict(X_r_test)
    reg2_pred = reg2.fit(X_r_train, y_r_train).predict(X_r_test)
    reg3_pred = reg3.fit(X_r_train, y_r_train).predict(X_r_test)
    ereg_pred = ereg.fit(X_r_train, y_r_train).predict(X_r_test)

    avg = np.average(np.asarray([reg1_pred, reg2_pred, reg3_pred]),
                     axis=0,
                     weights=[1, 2, 10])
    assert_almost_equal(ereg_pred, avg, decimal=2)

    ereg_weights_none = VotingRegressor([('mean', reg1), ('median', reg2),
                                         ('quantile', reg3)],
                                        weights=None)
    ereg_weights_equal = VotingRegressor([('mean', reg1), ('median', reg2),
                                          ('quantile', reg3)],
                                         weights=[1, 1, 1])
    ereg_weights_none.fit(X_r_train, y_r_train)
    ereg_weights_equal.fit(X_r_train, y_r_train)
    ereg_none_pred = ereg_weights_none.predict(X_r_test)
    ereg_equal_pred = ereg_weights_equal.predict(X_r_test)
    assert_almost_equal(ereg_none_pred, ereg_equal_pred, decimal=2)
Example #5
0
def test_dummy_regressor_return_std():
    X = [[0]] * 3  # ignored
    y = np.array([2, 2, 2])
    y_std_expected = np.array([0, 0, 0])
    cls = DummyRegressor()
    cls.fit(X, y)
    y_pred_list = cls.predict(X, return_std=True)
    # there should be two elements when return_std is True
    assert len(y_pred_list) == 2
    # the second element should be all zeros
    assert_array_equal(y_pred_list[1], y_std_expected)
Example #6
0
def test_regressor_prediction_independent_of_X(strategy):
    y = [0, 2, 1, 1]
    X1 = [[0]] * 4
    reg1 = DummyRegressor(strategy=strategy, constant=0, quantile=0.7)
    reg1.fit(X1, y)
    predictions1 = reg1.predict(X1)

    X2 = [[1]] * 4
    reg2 = DummyRegressor(strategy=strategy, constant=0, quantile=0.7)
    reg2.fit(X2, y)
    predictions2 = reg2.predict(X2)

    assert_array_equal(predictions1, predictions2)
Example #7
0
def test_constant_strategy_regressor():

    random_state = np.random.RandomState(seed=1)

    X = [[0]] * 5  # ignored
    y = random_state.randn(5)

    reg = DummyRegressor(strategy="constant", constant=[43])
    reg.fit(X, y)
    assert_array_equal(reg.predict(X), [43] * len(X))

    reg = DummyRegressor(strategy="constant", constant=43)
    reg.fit(X, y)
    assert_array_equal(reg.predict(X), [43] * len(X))
Example #8
0
def test_constant_size_multioutput_regressor():
    random_state = np.random.RandomState(seed=1)
    X = random_state.randn(10, 10)
    y = random_state.randn(10, 5)

    est = DummyRegressor(strategy='constant', constant=[1, 2, 3, 4])
    assert_raises(ValueError, est.fit, X, y)
Example #9
0
def test_dummy_regressor_sample_weight(n_samples=10):
    random_state = np.random.RandomState(seed=1)

    X = [[0]] * n_samples
    y = random_state.rand(n_samples)
    sample_weight = random_state.rand(n_samples)

    est = DummyRegressor(strategy="mean").fit(X, y, sample_weight)
    assert est.constant_ == np.average(y, weights=sample_weight)

    est = DummyRegressor(strategy="median").fit(X, y, sample_weight)
    assert est.constant_ == _weighted_percentile(y, sample_weight, 50.)

    est = DummyRegressor(strategy="quantile", quantile=.95).fit(X, y,
                                                                sample_weight)
    assert est.constant_ == _weighted_percentile(y, sample_weight, 95.)
Example #10
0
class NoWeightRegressor(BaseEstimator, RegressorMixin):
    def fit(self, X, y):
        self.reg = DummyRegressor()
        return self.reg.fit(X, y)

    def predict(self, X):
        return np.ones(X.shape[0])
Example #11
0
def test_notfitted():
    eclf = VotingClassifier(estimators=[('lr1', LogisticRegression()),
                                        ('lr2', LogisticRegression())],
                            voting='soft')
    ereg = VotingRegressor([('dr', DummyRegressor())])
    msg = ("This %s instance is not fitted yet. Call \'fit\'"
           " with appropriate arguments before using this estimator.")
    assert_raise_message(NotFittedError, msg % 'VotingClassifier',
                         eclf.predict, X)
    assert_raise_message(NotFittedError, msg % 'VotingClassifier',
                         eclf.predict_proba, X)
    assert_raise_message(NotFittedError, msg % 'VotingClassifier',
                         eclf.transform, X)
    assert_raise_message(NotFittedError, msg % 'VotingRegressor', ereg.predict,
                         X_r)
    assert_raise_message(NotFittedError, msg % 'VotingRegressor',
                         ereg.transform, X_r)
def test_multidimensional_X():
    """
    Check that the AdaBoost estimators can work with n-dimensional
    data matrix
    """
    rng = np.random.RandomState(0)

    X = rng.randn(50, 3, 3)
    yc = rng.choice([0, 1], 50)
    yr = rng.randn(50)

    boost = AdaBoostClassifier(DummyClassifier(strategy='most_frequent'))
    boost.fit(X, yc)
    boost.predict(X)
    boost.predict_proba(X)

    boost = AdaBoostRegressor(DummyRegressor())
    boost.fit(X, yr)
    boost.predict(X)
Example #13
0
def test_regression():
    # Check regression for various parameter settings.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
                                                        boston.target[:50],
                                                        random_state=rng)
    grid = ParameterGrid({
        "max_samples": [0.5, 1.0],
        "max_features": [0.5, 1.0],
        "bootstrap": [True, False],
        "bootstrap_features": [True, False]
    })

    for base_estimator in [
            None,
            DummyRegressor(),
            DecisionTreeRegressor(),
            KNeighborsRegressor(),
            SVR()
    ]:
        for params in grid:
            BaggingRegressor(base_estimator=base_estimator,
                             random_state=rng,
                             **params).fit(X_train, y_train).predict(X_test)
Example #14
0
def test_mean_strategy_multioutput_regressor():

    random_state = np.random.RandomState(seed=1)

    X_learn = random_state.randn(10, 10)
    y_learn = random_state.randn(10, 5)

    mean = np.mean(y_learn, axis=0).reshape((1, -1))

    X_test = random_state.randn(20, 10)
    y_test = random_state.randn(20, 5)

    # Correctness oracle
    est = DummyRegressor()
    est.fit(X_learn, y_learn)
    y_pred_learn = est.predict(X_learn)
    y_pred_test = est.predict(X_test)

    _check_equality_regressor(mean, y_learn, y_pred_learn, y_test, y_pred_test)
    _check_behavior_2d(est)
Example #15
0
def test_constant_strategy_multioutput_regressor():

    random_state = np.random.RandomState(seed=1)

    X_learn = random_state.randn(10, 10)
    y_learn = random_state.randn(10, 5)

    # test with 2d array
    constants = random_state.randn(5)

    X_test = random_state.randn(20, 10)
    y_test = random_state.randn(20, 5)

    # Correctness oracle
    est = DummyRegressor(strategy="constant", constant=constants)
    est.fit(X_learn, y_learn)
    y_pred_learn = est.predict(X_learn)
    y_pred_test = est.predict(X_test)

    _check_equality_regressor(
        constants, y_learn, y_pred_learn, y_test, y_pred_test)
    _check_behavior_2d_for_constant(est)
Example #16
0
def test_quantile_invalid():

    X = [[0]] * 5  # ignored
    y = [0] * 5  # ignored

    est = DummyRegressor(strategy="quantile")
    assert_raises(ValueError, est.fit, X, y)

    est = DummyRegressor(strategy="quantile", quantile=None)
    assert_raises(ValueError, est.fit, X, y)

    est = DummyRegressor(strategy="quantile", quantile=[0])
    assert_raises(ValueError, est.fit, X, y)

    est = DummyRegressor(strategy="quantile", quantile=-0.1)
    assert_raises(ValueError, est.fit, X, y)

    est = DummyRegressor(strategy="quantile", quantile=1.1)
    assert_raises(ValueError, est.fit, X, y)

    est = DummyRegressor(strategy="quantile", quantile='abc')
    assert_raises(TypeError, est.fit, X, y)
Example #17
0
 def fit(self, X, y):
     self.reg = DummyRegressor()
     return self.reg.fit(X, y)
Example #18
0
def test_regressor_score_with_None(y, y_test):
    reg = DummyRegressor()
    reg.fit(None, y)
    assert reg.score(None, y_test) == 1.0
Example #19
0
def test_regressor_exceptions():
    reg = DummyRegressor()
    assert_raises(NotFittedError, reg.predict, [])
Example #20
0
def test_quantile_strategy_regressor():

    random_state = np.random.RandomState(seed=1)

    X = [[0]] * 5  # ignored
    y = random_state.randn(5)

    reg = DummyRegressor(strategy="quantile", quantile=0.5)
    reg.fit(X, y)
    assert_array_equal(reg.predict(X), [np.median(y)] * len(X))

    reg = DummyRegressor(strategy="quantile", quantile=0)
    reg.fit(X, y)
    assert_array_equal(reg.predict(X), [np.min(y)] * len(X))

    reg = DummyRegressor(strategy="quantile", quantile=1)
    reg.fit(X, y)
    assert_array_equal(reg.predict(X), [np.max(y)] * len(X))

    reg = DummyRegressor(strategy="quantile", quantile=0.3)
    reg.fit(X, y)
    assert_array_equal(reg.predict(X), [np.percentile(y, q=30)] * len(X))
Example #21
0
def test_constants_not_specified_regressor():
    X = [[0]] * 5
    y = [1, 2, 4, 6, 8]

    est = DummyRegressor(strategy='constant')
    assert_raises(TypeError, est.fit, X, y)
Example #22
0
def test_unknown_strategey_regressor():
    X = [[0]] * 5
    y = [1, 2, 4, 6, 8]

    est = DummyRegressor(strategy='gona')
    assert_raises(ValueError, est.fit, X, y)
Example #23
0
def test_quantile_strategy_multioutput_regressor():

    random_state = np.random.RandomState(seed=1)

    X_learn = random_state.randn(10, 10)
    y_learn = random_state.randn(10, 5)

    median = np.median(y_learn, axis=0).reshape((1, -1))
    quantile_values = np.percentile(y_learn, axis=0, q=80).reshape((1, -1))

    X_test = random_state.randn(20, 10)
    y_test = random_state.randn(20, 5)

    # Correctness oracle
    est = DummyRegressor(strategy="quantile", quantile=0.5)
    est.fit(X_learn, y_learn)
    y_pred_learn = est.predict(X_learn)
    y_pred_test = est.predict(X_test)

    _check_equality_regressor(
        median, y_learn, y_pred_learn, y_test, y_pred_test)
    _check_behavior_2d(est)

    # Correctness oracle
    est = DummyRegressor(strategy="quantile", quantile=0.8)
    est.fit(X_learn, y_learn)
    y_pred_learn = est.predict(X_learn)
    y_pred_test = est.predict(X_test)

    _check_equality_regressor(
        quantile_values, y_learn, y_pred_learn, y_test, y_pred_test)
    _check_behavior_2d(est)
Example #24
0
    if imputation_order == 'roman':
        assert np.all(ordered_idx[:d-1] == np.arange(1, d))
    elif imputation_order == 'arabic':
        assert np.all(ordered_idx[:d-1] == np.arange(d-1, 0, -1))
    elif imputation_order == 'random':
        ordered_idx_round_1 = ordered_idx[:d-1]
        ordered_idx_round_2 = ordered_idx[d-1:]
        assert ordered_idx_round_1 != ordered_idx_round_2
    elif 'ending' in imputation_order:
        assert len(ordered_idx) == max_iter * (d - 1)


@pytest.mark.parametrize(
    "estimator",
    [None, DummyRegressor(), BayesianRidge(), ARDRegression(), RidgeCV()]
)
def test_iterative_imputer_estimators(estimator):
    rng = np.random.RandomState(0)

    n = 100
    d = 10
    X = _sparse_random_matrix(n, d, density=0.10, random_state=rng).toarray()

    imputer = IterativeImputer(missing_values=0,
                               max_iter=1,
                               estimator=estimator,
                               random_state=rng)
    imputer.fit_transform(X)

    # check that types are correct for estimators
Example #25
0
def test_quantile_strategy_empty_train():
    est = DummyRegressor(strategy="quantile", quantile=0.4)
    assert_raises(ValueError, est.fit, [], [])
Example #26
0
def test_set_params_nested_pipeline():
    estimator = Pipeline([('a', Pipeline([('b', DummyRegressor())]))])
    estimator.set_params(a__b__alpha=0.001, a__b=Lasso())
    estimator.set_params(a__steps=[('b', LogisticRegression())], a__b__C=5)
Example #27
0
    reg_drop = StackingRegressor(estimators=estimators,
                                 final_estimator=rf,
                                 cv=5)

    reg.fit(X_train, y_train)
    reg_drop.fit(X_train, y_train)
    assert_allclose(reg.predict(X_test), reg_drop.predict(X_test))
    assert_allclose(reg.transform(X_test), reg_drop.transform(X_test))


@pytest.mark.parametrize(
    "cv", [3, KFold(n_splits=3, shuffle=True, random_state=42)])
@pytest.mark.parametrize("final_estimator, predict_params",
                         [(None, {}),
                          (RandomForestRegressor(random_state=42), {}),
                          (DummyRegressor(), {
                              'return_std': True
                          })])
@pytest.mark.parametrize("passthrough", [False, True])
def test_stacking_regressor_diabetes(cv, final_estimator, predict_params,
                                     passthrough):
    # prescale the data to avoid convergence warning without using a pipeline
    # for later assert
    X_train, X_test, y_train, _ = train_test_split(scale(X_diabetes),
                                                   y_diabetes,
                                                   random_state=42)
    estimators = [('lr', LinearRegression()), ('svr', LinearSVR())]
    reg = StackingRegressor(estimators=estimators,
                            final_estimator=final_estimator,
                            cv=cv,
                            passthrough=passthrough)