Exemple #1
0
def test_calibrate_uncertainty():
    np.random.seed(0)

    df_boston_train, df_boston_test = utils.get_boston_regression_dataset()

    column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'}

    df_boston_train, uncertainty_data = train_test_split(df_boston_train,
                                                         test_size=0.5)
    uncertainty_data, uncertainty_calibration_data = train_test_split(
        uncertainty_data, test_size=0.5)

    ml_predictor = Predictor(type_of_estimator='regressor',
                             column_descriptions=column_descriptions)

    uncertainty_calibration_settings = {
        'num_buckets': 3,
        'percentiles': [25, 50, 75]
    }
    ml_predictor.train(
        df_boston_train,
        perform_feature_selection=True,
        train_uncertainty_model=True,
        uncertainty_data=uncertainty_data,
        calibrate_uncertainty=True,
        uncertainty_calibration_settings=uncertainty_calibration_settings,
        uncertainty_calibration_data=uncertainty_calibration_data)

    uncertainty_score = ml_predictor.predict_uncertainty(df_boston_test)

    assert 'percentile_25_delta' in list(uncertainty_score.columns)
    assert 'percentile_50_delta' in list(uncertainty_score.columns)
    assert 'percentile_75_delta' in list(uncertainty_score.columns)
    assert 'bucket_num' in list(uncertainty_score.columns)
def test_predict_uncertainty_returns_pandas_DataFrame_for_more_than_one_value():
    np.random.seed(0)

    df_boston_train, df_boston_test = utils.get_boston_regression_dataset()

    column_descriptions = {
        'MEDV': 'output'
        , 'CHAS': 'categorical'
    }

    df_boston_train, uncertainty_data = train_test_split(df_boston_train, test_size=0.5)

    ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions)

    ml_predictor.train(df_boston_train, perform_feature_selection=True, train_uncertainty_model=True, uncertainty_data=uncertainty_data)

    uncertainties = ml_predictor.predict_uncertainty(df_boston_test)

    assert isinstance(uncertainties, pd.DataFrame)
def test_predict_uncertainty_returns_dict_for_one_value():
    np.random.seed(0)

    df_boston_train, df_boston_test = utils.get_boston_regression_dataset()

    column_descriptions = {
        'MEDV': 'output'
        , 'CHAS': 'categorical'
    }

    df_boston_train, uncertainty_data = train_test_split(df_boston_train, test_size=0.5)

    ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions)

    ml_predictor.train(df_boston_train, perform_feature_selection=True, train_uncertainty_model=True, uncertainty_data=uncertainty_data)

    test_list = df_boston_test.to_dict('records')

    for item in test_list:
        prediction = ml_predictor.predict_uncertainty(item)
        assert isinstance(prediction, dict)