def test_y_mean_attribute_regressor(): X = [[0]] * 5 y = [1, 2, 4, 6, 8] # when strategy = 'mean' est = DummyRegressor(strategy='mean') est.fit(X, y) assert est.constant_ == np.mean(y)
def test_dummy_regressor_on_3D_array(): X = np.array([[['foo']], [['bar']], [['baz']]]) y = np.array([2, 2, 2]) y_expected = np.array([2, 2, 2]) cls = DummyRegressor() cls.fit(X, y) y_pred = cls.predict(X) assert_array_equal(y_pred, y_expected)
def test_median_strategy_regressor(): random_state = np.random.RandomState(seed=1) X = [[0]] * 5 # ignored y = random_state.randn(5) reg = DummyRegressor(strategy="median") reg.fit(X, y) assert_array_equal(reg.predict(X), [np.median(y)] * len(X))
def test_weights_regressor(): """Check weighted average regression prediction on boston dataset.""" reg1 = DummyRegressor(strategy='mean') reg2 = DummyRegressor(strategy='median') reg3 = DummyRegressor(strategy='quantile', quantile=.2) ereg = VotingRegressor([('mean', reg1), ('median', reg2), ('quantile', reg3)], weights=[1, 2, 10]) X_r_train, X_r_test, y_r_train, y_r_test = \ train_test_split(X_r, y_r, test_size=.25) reg1_pred = reg1.fit(X_r_train, y_r_train).predict(X_r_test) reg2_pred = reg2.fit(X_r_train, y_r_train).predict(X_r_test) reg3_pred = reg3.fit(X_r_train, y_r_train).predict(X_r_test) ereg_pred = ereg.fit(X_r_train, y_r_train).predict(X_r_test) avg = np.average(np.asarray([reg1_pred, reg2_pred, reg3_pred]), axis=0, weights=[1, 2, 10]) assert_almost_equal(ereg_pred, avg, decimal=2) ereg_weights_none = VotingRegressor([('mean', reg1), ('median', reg2), ('quantile', reg3)], weights=None) ereg_weights_equal = VotingRegressor([('mean', reg1), ('median', reg2), ('quantile', reg3)], weights=[1, 1, 1]) ereg_weights_none.fit(X_r_train, y_r_train) ereg_weights_equal.fit(X_r_train, y_r_train) ereg_none_pred = ereg_weights_none.predict(X_r_test) ereg_equal_pred = ereg_weights_equal.predict(X_r_test) assert_almost_equal(ereg_none_pred, ereg_equal_pred, decimal=2)
def test_dummy_regressor_return_std(): X = [[0]] * 3 # ignored y = np.array([2, 2, 2]) y_std_expected = np.array([0, 0, 0]) cls = DummyRegressor() cls.fit(X, y) y_pred_list = cls.predict(X, return_std=True) # there should be two elements when return_std is True assert len(y_pred_list) == 2 # the second element should be all zeros assert_array_equal(y_pred_list[1], y_std_expected)
def test_regressor_prediction_independent_of_X(strategy): y = [0, 2, 1, 1] X1 = [[0]] * 4 reg1 = DummyRegressor(strategy=strategy, constant=0, quantile=0.7) reg1.fit(X1, y) predictions1 = reg1.predict(X1) X2 = [[1]] * 4 reg2 = DummyRegressor(strategy=strategy, constant=0, quantile=0.7) reg2.fit(X2, y) predictions2 = reg2.predict(X2) assert_array_equal(predictions1, predictions2)
def test_constant_strategy_regressor(): random_state = np.random.RandomState(seed=1) X = [[0]] * 5 # ignored y = random_state.randn(5) reg = DummyRegressor(strategy="constant", constant=[43]) reg.fit(X, y) assert_array_equal(reg.predict(X), [43] * len(X)) reg = DummyRegressor(strategy="constant", constant=43) reg.fit(X, y) assert_array_equal(reg.predict(X), [43] * len(X))
def test_constant_size_multioutput_regressor(): random_state = np.random.RandomState(seed=1) X = random_state.randn(10, 10) y = random_state.randn(10, 5) est = DummyRegressor(strategy='constant', constant=[1, 2, 3, 4]) assert_raises(ValueError, est.fit, X, y)
def test_dummy_regressor_sample_weight(n_samples=10): random_state = np.random.RandomState(seed=1) X = [[0]] * n_samples y = random_state.rand(n_samples) sample_weight = random_state.rand(n_samples) est = DummyRegressor(strategy="mean").fit(X, y, sample_weight) assert est.constant_ == np.average(y, weights=sample_weight) est = DummyRegressor(strategy="median").fit(X, y, sample_weight) assert est.constant_ == _weighted_percentile(y, sample_weight, 50.) est = DummyRegressor(strategy="quantile", quantile=.95).fit(X, y, sample_weight) assert est.constant_ == _weighted_percentile(y, sample_weight, 95.)
class NoWeightRegressor(BaseEstimator, RegressorMixin): def fit(self, X, y): self.reg = DummyRegressor() return self.reg.fit(X, y) def predict(self, X): return np.ones(X.shape[0])
def test_notfitted(): eclf = VotingClassifier(estimators=[('lr1', LogisticRegression()), ('lr2', LogisticRegression())], voting='soft') ereg = VotingRegressor([('dr', DummyRegressor())]) msg = ("This %s instance is not fitted yet. Call \'fit\'" " with appropriate arguments before using this estimator.") assert_raise_message(NotFittedError, msg % 'VotingClassifier', eclf.predict, X) assert_raise_message(NotFittedError, msg % 'VotingClassifier', eclf.predict_proba, X) assert_raise_message(NotFittedError, msg % 'VotingClassifier', eclf.transform, X) assert_raise_message(NotFittedError, msg % 'VotingRegressor', ereg.predict, X_r) assert_raise_message(NotFittedError, msg % 'VotingRegressor', ereg.transform, X_r)
def test_multidimensional_X(): """ Check that the AdaBoost estimators can work with n-dimensional data matrix """ rng = np.random.RandomState(0) X = rng.randn(50, 3, 3) yc = rng.choice([0, 1], 50) yr = rng.randn(50) boost = AdaBoostClassifier(DummyClassifier(strategy='most_frequent')) boost.fit(X, yc) boost.predict(X) boost.predict_proba(X) boost = AdaBoostRegressor(DummyRegressor()) boost.fit(X, yr) boost.predict(X)
def test_regression(): # Check regression for various parameter settings. rng = check_random_state(0) X_train, X_test, y_train, y_test = train_test_split(boston.data[:50], boston.target[:50], random_state=rng) grid = ParameterGrid({ "max_samples": [0.5, 1.0], "max_features": [0.5, 1.0], "bootstrap": [True, False], "bootstrap_features": [True, False] }) for base_estimator in [ None, DummyRegressor(), DecisionTreeRegressor(), KNeighborsRegressor(), SVR() ]: for params in grid: BaggingRegressor(base_estimator=base_estimator, random_state=rng, **params).fit(X_train, y_train).predict(X_test)
def test_mean_strategy_multioutput_regressor(): random_state = np.random.RandomState(seed=1) X_learn = random_state.randn(10, 10) y_learn = random_state.randn(10, 5) mean = np.mean(y_learn, axis=0).reshape((1, -1)) X_test = random_state.randn(20, 10) y_test = random_state.randn(20, 5) # Correctness oracle est = DummyRegressor() est.fit(X_learn, y_learn) y_pred_learn = est.predict(X_learn) y_pred_test = est.predict(X_test) _check_equality_regressor(mean, y_learn, y_pred_learn, y_test, y_pred_test) _check_behavior_2d(est)
def test_constant_strategy_multioutput_regressor(): random_state = np.random.RandomState(seed=1) X_learn = random_state.randn(10, 10) y_learn = random_state.randn(10, 5) # test with 2d array constants = random_state.randn(5) X_test = random_state.randn(20, 10) y_test = random_state.randn(20, 5) # Correctness oracle est = DummyRegressor(strategy="constant", constant=constants) est.fit(X_learn, y_learn) y_pred_learn = est.predict(X_learn) y_pred_test = est.predict(X_test) _check_equality_regressor( constants, y_learn, y_pred_learn, y_test, y_pred_test) _check_behavior_2d_for_constant(est)
def test_quantile_invalid(): X = [[0]] * 5 # ignored y = [0] * 5 # ignored est = DummyRegressor(strategy="quantile") assert_raises(ValueError, est.fit, X, y) est = DummyRegressor(strategy="quantile", quantile=None) assert_raises(ValueError, est.fit, X, y) est = DummyRegressor(strategy="quantile", quantile=[0]) assert_raises(ValueError, est.fit, X, y) est = DummyRegressor(strategy="quantile", quantile=-0.1) assert_raises(ValueError, est.fit, X, y) est = DummyRegressor(strategy="quantile", quantile=1.1) assert_raises(ValueError, est.fit, X, y) est = DummyRegressor(strategy="quantile", quantile='abc') assert_raises(TypeError, est.fit, X, y)
def fit(self, X, y): self.reg = DummyRegressor() return self.reg.fit(X, y)
def test_regressor_score_with_None(y, y_test): reg = DummyRegressor() reg.fit(None, y) assert reg.score(None, y_test) == 1.0
def test_regressor_exceptions(): reg = DummyRegressor() assert_raises(NotFittedError, reg.predict, [])
def test_quantile_strategy_regressor(): random_state = np.random.RandomState(seed=1) X = [[0]] * 5 # ignored y = random_state.randn(5) reg = DummyRegressor(strategy="quantile", quantile=0.5) reg.fit(X, y) assert_array_equal(reg.predict(X), [np.median(y)] * len(X)) reg = DummyRegressor(strategy="quantile", quantile=0) reg.fit(X, y) assert_array_equal(reg.predict(X), [np.min(y)] * len(X)) reg = DummyRegressor(strategy="quantile", quantile=1) reg.fit(X, y) assert_array_equal(reg.predict(X), [np.max(y)] * len(X)) reg = DummyRegressor(strategy="quantile", quantile=0.3) reg.fit(X, y) assert_array_equal(reg.predict(X), [np.percentile(y, q=30)] * len(X))
def test_constants_not_specified_regressor(): X = [[0]] * 5 y = [1, 2, 4, 6, 8] est = DummyRegressor(strategy='constant') assert_raises(TypeError, est.fit, X, y)
def test_unknown_strategey_regressor(): X = [[0]] * 5 y = [1, 2, 4, 6, 8] est = DummyRegressor(strategy='gona') assert_raises(ValueError, est.fit, X, y)
def test_quantile_strategy_multioutput_regressor(): random_state = np.random.RandomState(seed=1) X_learn = random_state.randn(10, 10) y_learn = random_state.randn(10, 5) median = np.median(y_learn, axis=0).reshape((1, -1)) quantile_values = np.percentile(y_learn, axis=0, q=80).reshape((1, -1)) X_test = random_state.randn(20, 10) y_test = random_state.randn(20, 5) # Correctness oracle est = DummyRegressor(strategy="quantile", quantile=0.5) est.fit(X_learn, y_learn) y_pred_learn = est.predict(X_learn) y_pred_test = est.predict(X_test) _check_equality_regressor( median, y_learn, y_pred_learn, y_test, y_pred_test) _check_behavior_2d(est) # Correctness oracle est = DummyRegressor(strategy="quantile", quantile=0.8) est.fit(X_learn, y_learn) y_pred_learn = est.predict(X_learn) y_pred_test = est.predict(X_test) _check_equality_regressor( quantile_values, y_learn, y_pred_learn, y_test, y_pred_test) _check_behavior_2d(est)
if imputation_order == 'roman': assert np.all(ordered_idx[:d-1] == np.arange(1, d)) elif imputation_order == 'arabic': assert np.all(ordered_idx[:d-1] == np.arange(d-1, 0, -1)) elif imputation_order == 'random': ordered_idx_round_1 = ordered_idx[:d-1] ordered_idx_round_2 = ordered_idx[d-1:] assert ordered_idx_round_1 != ordered_idx_round_2 elif 'ending' in imputation_order: assert len(ordered_idx) == max_iter * (d - 1) @pytest.mark.parametrize( "estimator", [None, DummyRegressor(), BayesianRidge(), ARDRegression(), RidgeCV()] ) def test_iterative_imputer_estimators(estimator): rng = np.random.RandomState(0) n = 100 d = 10 X = _sparse_random_matrix(n, d, density=0.10, random_state=rng).toarray() imputer = IterativeImputer(missing_values=0, max_iter=1, estimator=estimator, random_state=rng) imputer.fit_transform(X) # check that types are correct for estimators
def test_quantile_strategy_empty_train(): est = DummyRegressor(strategy="quantile", quantile=0.4) assert_raises(ValueError, est.fit, [], [])
def test_set_params_nested_pipeline(): estimator = Pipeline([('a', Pipeline([('b', DummyRegressor())]))]) estimator.set_params(a__b__alpha=0.001, a__b=Lasso()) estimator.set_params(a__steps=[('b', LogisticRegression())], a__b__C=5)
reg_drop = StackingRegressor(estimators=estimators, final_estimator=rf, cv=5) reg.fit(X_train, y_train) reg_drop.fit(X_train, y_train) assert_allclose(reg.predict(X_test), reg_drop.predict(X_test)) assert_allclose(reg.transform(X_test), reg_drop.transform(X_test)) @pytest.mark.parametrize( "cv", [3, KFold(n_splits=3, shuffle=True, random_state=42)]) @pytest.mark.parametrize("final_estimator, predict_params", [(None, {}), (RandomForestRegressor(random_state=42), {}), (DummyRegressor(), { 'return_std': True })]) @pytest.mark.parametrize("passthrough", [False, True]) def test_stacking_regressor_diabetes(cv, final_estimator, predict_params, passthrough): # prescale the data to avoid convergence warning without using a pipeline # for later assert X_train, X_test, y_train, _ = train_test_split(scale(X_diabetes), y_diabetes, random_state=42) estimators = [('lr', LinearRegression()), ('svr', LinearSVR())] reg = StackingRegressor(estimators=estimators, final_estimator=final_estimator, cv=cv, passthrough=passthrough)