Example #1
0
def test_sklearn_regression():
    # supports weights
    check_regression(SklearnRegressor(clf=AdaBoostRegressor(n_estimators=50)))
    # doesn't support weights
    check_regression(
        SklearnRegressor(clf=GradientBoostingRegressor(n_estimators=50)),
        supports_weight=False)
Example #2
0
def test_own_regression_reports():
    """
    testing regressor.test_on
    """
    X, y, sample_weight = generate_regression_data()
    regressor = SklearnRegressor(RandomForestRegressor())
    regressor.fit(X, y, sample_weight=sample_weight)
    report = regressor.test_on(X, y, sample_weight=sample_weight)
    mse1 = report.compute_metric(mean_squared_error)

    lds = LabeledDataStorage(X, y, sample_weight=sample_weight)
    mse2 = regressor.test_on_lds(lds=lds).compute_metric(mean_squared_error)
    assert mse1 == mse2, 'Something wrong with test_on'
Example #3
0
def test_gridsearch_sklearn_regression():
    scorer = RegressionFoldingScorer(mean_squared_error)

    grid_param = OrderedDict({
        "n_estimators": [10, 20],
        "learning_rate": [0.1, 0.05],
        'features': [['column0', 'column1'], ['column0', 'column1', 'column2']]
    })
    generator = RegressionParameterOptimizer(grid_param, n_evaluations=4)

    grid = GridOptimalSearchCV(SklearnRegressor(clf=AdaBoostRegressor()),
                               generator, scorer)
    # parallel_profile='threads-3')

    _ = check_grid(grid,
                   False,
                   False,
                   False,
                   use_weights=True,
                   classification=False)
    regressor = check_grid(grid,
                           False,
                           False,
                           False,
                           use_weights=False,
                           classification=False)

    # Check parameters of best fitted classifier
    assert 2 <= len(regressor.features) <= 3, 'Features were not set'
    params = regressor.get_params()
    for key in grid_param:
        if key in params:
            assert params[key] == grid.generator.best_params_[key]
        else:
            assert params['clf__' + key] == grid.generator.best_params_[key]
Example #4
0
def test_folding_regressor(n_samples=100, n_features=3):
    """
    checking mostly different things with quality of predictions and not using train data during predictions.
    """
    from sklearn.metrics import mean_squared_error

    X = numpy.random.normal(size=[n_samples, n_features])
    y = numpy.random.normal(size=n_samples)
    kfolder = FoldingRegressor(SklearnRegressor(GradientBoostingRegressor()),
                               n_folds=2)
    kfolder.fit(X, y)
    preds = kfolder.predict(X)
    # checking that we fitted fine
    assert mean_squared_error(y,
                              preds) > mean_squared_error(y * 0., preds) * 0.5

    # shuffled predictions
    p = numpy.random.permutation(n_samples)
    preds2 = kfolder.predict(X[p])[numpy.argsort(p)]

    # Now let's compare this with shuffled kFolding:
    assert mean_squared_error(y, preds) > mean_squared_error(y, preds2) * 0.5

    preds_mean = kfolder.predict(X,
                                 vote_function=lambda x: numpy.mean(x, axis=0))
    # Now let's compare this with mean prediction:
    assert mean_squared_error(y, preds) > mean_squared_error(y, preds_mean)
Example #5
0
def test_folding_regressor_functions():
    """Testing folding functions """
    data, y, sample_weight = generate_classification_data()

    for X in [data, numpy.array(data)]:
        kfolder = FoldingRegressor(SklearnRegressor(GradientBoostingRegressor(n_estimators=5)), n_folds=2)
        kfolder.fit(X, y, sample_weight=sample_weight)
        preds = kfolder.predict(X)
        for p in kfolder.staged_predict(X):
            pass
        assert numpy.allclose(p, preds)

        importances = kfolder.feature_importances_
        other_importances = kfolder.get_feature_importances()
Example #6
0
def test_own_regression_reports():
    """
    testing regressor.test_on
    """
    X, y, sample_weight = generate_regression_data()
    regressor = SklearnRegressor(RandomForestRegressor())
    regressor.fit(X, y, sample_weight=sample_weight)
    report = regressor.test_on(X, y, sample_weight=sample_weight)
    mse1 = report.compute_metric(mean_squared_error)

    lds = LabeledDataStorage(X, y, sample_weight=sample_weight)
    mse2 = regressor.test_on_lds(lds=lds).compute_metric(mean_squared_error)
    assert mse1 == mse2, 'Something wrong with test_on'
Example #7
0
def test_folding_regressor_with_check_model():
    base_clf = SklearnRegressor(GradientBoostingRegressor(n_estimators=4))
    folding_str = FoldingRegressor(base_clf, n_folds=2)
    check_regression(folding_str, True, True, True)
Example #8
0
def test_sklearn_regression():
    check_regression(SklearnRegressor(clf=AdaBoostRegressor(n_estimators=50)))
    check_regression(
        SklearnRegressor(clf=GradientBoostingRegressor(n_estimators=50)))