def test_sklearn_regression(): # supports weights check_regression(SklearnRegressor(clf=AdaBoostRegressor(n_estimators=50))) # doesn't support weights check_regression( SklearnRegressor(clf=GradientBoostingRegressor(n_estimators=50)), supports_weight=False)
def test_own_regression_reports(): """ testing regressor.test_on """ X, y, sample_weight = generate_regression_data() regressor = SklearnRegressor(RandomForestRegressor()) regressor.fit(X, y, sample_weight=sample_weight) report = regressor.test_on(X, y, sample_weight=sample_weight) mse1 = report.compute_metric(mean_squared_error) lds = LabeledDataStorage(X, y, sample_weight=sample_weight) mse2 = regressor.test_on_lds(lds=lds).compute_metric(mean_squared_error) assert mse1 == mse2, 'Something wrong with test_on'
def test_gridsearch_sklearn_regression(): scorer = RegressionFoldingScorer(mean_squared_error) grid_param = OrderedDict({ "n_estimators": [10, 20], "learning_rate": [0.1, 0.05], 'features': [['column0', 'column1'], ['column0', 'column1', 'column2']] }) generator = RegressionParameterOptimizer(grid_param, n_evaluations=4) grid = GridOptimalSearchCV(SklearnRegressor(clf=AdaBoostRegressor()), generator, scorer) # parallel_profile='threads-3') _ = check_grid(grid, False, False, False, use_weights=True, classification=False) regressor = check_grid(grid, False, False, False, use_weights=False, classification=False) # Check parameters of best fitted classifier assert 2 <= len(regressor.features) <= 3, 'Features were not set' params = regressor.get_params() for key in grid_param: if key in params: assert params[key] == grid.generator.best_params_[key] else: assert params['clf__' + key] == grid.generator.best_params_[key]
def test_folding_regressor(n_samples=100, n_features=3): """ checking mostly different things with quality of predictions and not using train data during predictions. """ from sklearn.metrics import mean_squared_error X = numpy.random.normal(size=[n_samples, n_features]) y = numpy.random.normal(size=n_samples) kfolder = FoldingRegressor(SklearnRegressor(GradientBoostingRegressor()), n_folds=2) kfolder.fit(X, y) preds = kfolder.predict(X) # checking that we fitted fine assert mean_squared_error(y, preds) > mean_squared_error(y * 0., preds) * 0.5 # shuffled predictions p = numpy.random.permutation(n_samples) preds2 = kfolder.predict(X[p])[numpy.argsort(p)] # Now let's compare this with shuffled kFolding: assert mean_squared_error(y, preds) > mean_squared_error(y, preds2) * 0.5 preds_mean = kfolder.predict(X, vote_function=lambda x: numpy.mean(x, axis=0)) # Now let's compare this with mean prediction: assert mean_squared_error(y, preds) > mean_squared_error(y, preds_mean)
def test_folding_regressor_functions(): """Testing folding functions """ data, y, sample_weight = generate_classification_data() for X in [data, numpy.array(data)]: kfolder = FoldingRegressor(SklearnRegressor(GradientBoostingRegressor(n_estimators=5)), n_folds=2) kfolder.fit(X, y, sample_weight=sample_weight) preds = kfolder.predict(X) for p in kfolder.staged_predict(X): pass assert numpy.allclose(p, preds) importances = kfolder.feature_importances_ other_importances = kfolder.get_feature_importances()
def test_folding_regressor_with_check_model(): base_clf = SklearnRegressor(GradientBoostingRegressor(n_estimators=4)) folding_str = FoldingRegressor(base_clf, n_folds=2) check_regression(folding_str, True, True, True)
def test_sklearn_regression(): check_regression(SklearnRegressor(clf=AdaBoostRegressor(n_estimators=50))) check_regression( SklearnRegressor(clf=GradientBoostingRegressor(n_estimators=50)))