def test_folding_regressor(n_samples=100, n_features=3): """ checking mostly different things with quality of predictions and not using train data during predictions. """ from sklearn.metrics import mean_squared_error X = numpy.random.normal(size=[n_samples, n_features]) y = numpy.random.normal(size=n_samples) kfolder = FoldingRegressor(SklearnRegressor(GradientBoostingRegressor()), n_folds=2) kfolder.fit(X, y) preds = kfolder.predict(X) # checking that we fitted fine assert mean_squared_error(y, preds) > mean_squared_error(y * 0., preds) * 0.5 # shuffled predictions p = numpy.random.permutation(n_samples) preds2 = kfolder.predict(X[p])[numpy.argsort(p)] # Now let's compare this with shuffled kFolding: assert mean_squared_error(y, preds) > mean_squared_error(y, preds2) * 0.5 preds_mean = kfolder.predict(X, vote_function=lambda x: numpy.mean(x, axis=0)) # Now let's compare this with mean prediction: assert mean_squared_error(y, preds) > mean_squared_error(y, preds_mean)
def test_folding_regressor_functions(): """Testing folding functions """ data, y, sample_weight = generate_classification_data() for X in [data, numpy.array(data)]: kfolder = FoldingRegressor(SklearnRegressor(GradientBoostingRegressor(n_estimators=5)), n_folds=2) kfolder.fit(X, y, sample_weight=sample_weight) preds = kfolder.predict(X) for p in kfolder.staged_predict(X): pass assert numpy.allclose(p, preds) importances = kfolder.feature_importances_ other_importances = kfolder.get_feature_importances()
def test_folding_regressor_with_check_model(): base_clf = SklearnRegressor(GradientBoostingRegressor(n_estimators=4)) folding_str = FoldingRegressor(base_clf, n_folds=2) check_regression(folding_str, True, True, True)