Esempio n. 1
0
def test_regression_report():
    regressors = RegressorsFactory()
    regressors.add_regressor('gb', GradientBoostingRegressor(n_estimators=10))
    regressors.add_regressor('rf', RandomForestRegressor())
    regressors.add_regressor('ada', AdaBoostRegressor(n_estimators=10))

    X, y = generate_regression_sample(1000, 5)
    regressors.fit(X, y)

    X, y = generate_regression_sample(1000, 5)
    test_lds = LabeledDataStorage(X, y, sample_weight=None)
    regression_report = regressors.test_on_lds(test_lds)
    val = numpy.mean(X['column0'])
    _regression_mask_report(regression_report, "column0 > %f" % val, X)
    _regression_mask_report(regression_report, lambda x: numpy.array(x['column0']) < val, X)
    _regression_mask_report(regression_report, None, X)
Esempio n. 2
0
def test_factory():
    factory = RegressorsFactory()
    try:
        from rep.estimators.tmva import TMVARegressor
        factory.add_regressor(
            'tmva',
            TMVARegressor(
                factory_options="Silent=True:V=False:DrawProgressBar=False"))
    except ImportError:
        pass
    factory.add_regressor('rf', RandomForestRegressor(n_estimators=10))
    factory.add_regressor('ada', AdaBoostRegressor(n_estimators=20))

    X, y, sample_weight = generate_classification_data()
    assert factory == factory.fit(X,
                                  y,
                                  sample_weight=sample_weight,
                                  features=list(X.columns))
    values = factory.predict(X)

    for cl in factory.values():
        assert list(cl.features) == list(X.columns)

    for key, val in values.items():
        score = mean_squared_error(y, val)
        print(score)
        assert score < 0.2

    for key, iterator in factory.staged_predict(X).items():
        assert key != 'tmva', 'tmva does not support staged pp'
        for p in iterator:
            assert p.shape == (len(X), )

        # checking that last iteration coincides with previous
        assert numpy.all(p == values[key])

    # testing picklability
    dump_string = cPickle.dumps(factory)
    clf_loaded = cPickle.loads(dump_string)

    assert type(factory) == type(clf_loaded)

    probs1 = factory.predict(X)
    probs2 = clf_loaded.predict(X)
    for key, val in probs1.items():
        assert numpy.all(val == probs2[key]), 'something strange was loaded'

    report = RegressionReport({'rf': factory['rf']},
                              LabeledDataStorage(X, y, sample_weight))
    report.feature_importance_shuffling(mean_squared_mod).plot(new_plot=True,
                                                               figsize=(18, 3))
    report = factory.test_on_lds(LabeledDataStorage(X, y, sample_weight))
    report = factory.test_on(X, y, sample_weight=sample_weight)
    report.feature_importance()
    report.features_correlation_matrix()
    report.predictions_scatter()

    val = numpy.mean(X['column0'])
    report_mask(report, "column0 > %f" % val, X)
    report_mask(report, lambda x: numpy.array(x['column0']) < val, X)
    report_mask(report, None, X)
Esempio n. 3
0
def test_factory():
    factory = RegressorsFactory()
    try:
        from rep.estimators.tmva import TMVARegressor
        factory.add_regressor('tmva', TMVARegressor())
    except ImportError:
        pass
    factory.add_regressor('rf', RandomForestRegressor(n_estimators=10))
    factory.add_regressor('ada', AdaBoostRegressor(n_estimators=20))

    X, y, sample_weight = generate_classification_data()
    assert factory == factory.fit(X, y, sample_weight=sample_weight, features=list(X.columns))
    values = factory.predict(X)

    for cl in factory.values():
        assert list(cl.features) == list(X.columns)

    for key, val in values.items():
        score = mean_squared_error(y, val)
        print(score)
        assert score < 0.2

    for key, iterator in factory.staged_predict(X).items():
        assert key != 'tmva', 'tmva does not support staged pp'
        for p in iterator:
            assert p.shape == (len(X), )

        # checking that last iteration coincides with previous
        assert numpy.all(p == values[key])

    # testing picklability
    dump_string = cPickle.dumps(factory)
    clf_loaded = cPickle.loads(dump_string)

    assert type(factory) == type(clf_loaded)

    probs1 = factory.predict(X)
    probs2 = clf_loaded.predict(X)
    for key, val in probs1.items():
        assert numpy.all(val == probs2[key]), 'something strange was loaded'

    report = RegressionReport({'rf': factory['rf']}, LabeledDataStorage(X, y, sample_weight))
    report.feature_importance_shuffling(mean_squared_mod).plot(new_plot=True, figsize=(18, 3))
    report = factory.test_on_lds(LabeledDataStorage(X, y, sample_weight))
    report = factory.test_on(X, y, sample_weight=sample_weight)
    report.feature_importance()
    report.features_correlation_matrix()
    report.predictions_scatter()

    val = numpy.mean(X['column0'])
    report_mask(report, "column0 > %f" % val, X)
    report_mask(report, lambda x: numpy.array(x['column0']) < val, X)
    report_mask(report, None, X)
Esempio n. 4
0
def test_regression_report():
    regressors = RegressorsFactory()
    regressors.add_regressor('gb', GradientBoostingRegressor(n_estimators=10))
    regressors.add_regressor('rf', RandomForestRegressor())
    regressors.add_regressor('ada', AdaBoostRegressor(n_estimators=10))

    X, y = generate_regression_sample(1000, 5)
    regressors.fit(X, y)

    X, y = generate_regression_sample(1000, 5)
    test_lds = LabeledDataStorage(X, y, sample_weight=None)
    regression_report = regressors.test_on_lds(test_lds)
    val = numpy.mean(X['column0'])
    _regression_mask_report(regression_report, "column0 > %f" % val, X)
    _regression_mask_report(regression_report,
                            lambda x: numpy.array(x['column0']) < val, X)
    _regression_mask_report(regression_report, None, X)