Esempio n. 1
0
def test_evaluate_regression_optimiser():
    """Test evaluate method of Optimiser class for regression."""
    reader = Reader(sep=",")
    dict = reader.train_test_split(Lpath=[
        "data_for_tests/train_regression.csv",
        "data_for_tests/test_regression.csv"
    ],
                                   target_name="SalePrice")
    drift_thresholder = Drift_thresholder()
    drift_thresholder = drift_thresholder.fit_transform(dict)

    mape = make_scorer(lambda y_true, y_pred: 100 * np.sum(
        np.abs(y_true - y_pred) / y_true) / len(y_true),
                       greater_is_better=False,
                       needs_proba=False)
    with pytest.warns(UserWarning) as record:
        opt = Optimiser(scoring=mape, n_folds=3)
    assert len(record) == 1
    score = opt.evaluate(None, dict)
    assert -np.Inf <= score

    with pytest.warns(UserWarning) as record:
        opt = Optimiser(scoring=None, n_folds=3)
    assert len(record) == 1
    score = opt.evaluate(None, dict)
    assert -np.Inf <= score

    with pytest.warns(UserWarning) as record:
        opt = Optimiser(scoring="wrong_scoring", n_folds=3)
    assert len(record) == 1
    with pytest.warns(UserWarning) as record:
        score = opt.evaluate(None, dict)
    assert -np.Inf <= score
Esempio n. 2
0
    def test_fit_predict_predictor_regression(mock_show):
        """Test fit_predict method of Predictor class for regression."""
        rd = Reader(sep=',')
        dt = rd.train_test_split(Lpath=[
            "data_for_tests/train_regression.csv",
            "data_for_tests/test_regression.csv"
        ],
                                 target_name="SalePrice")

        drift_thresholder = Drift_thresholder()
        df = drift_thresholder.fit_transform(dt)

        mape = make_scorer(lambda y_true, y_pred: 100 * np.sum(
            np.abs(y_true - y_pred) / y_true) / len(y_true),
                           greater_is_better=False,
                           needs_proba=False)
        opt = Optimiser(scoring=mape, n_folds=3)

        opt.evaluate(None, df)

        space = {
            'ne__numerical_strategy': {
                "search": "choice",
                "space": [0]
            },
            'ce__strategy': {
                "search":
                "choice",
                "space":
                ["label_encoding", "random_projection", "entity_embedding"]
            },
            'fs__threshold': {
                "search": "uniform",
                "space": [0.01, 0.3]
            },
            'est__max_depth': {
                "search": "choice",
                "space": [3, 4, 5, 6, 7]
            }
        }

        best = opt.optimise(space, df, 1)

        prd = Predictor(verbose=True)
        prd.fit_predict(best, df)
        pred_df = pd.read_csv("save/SalePrice_predictions.csv")
        assert np.all(
            list(pred_df.columns) == ['Unnamed: 0', 'SalePrice_predicted'])
        assert np.shape(pred_df) == (1459, 2)
# Reading and cleaning all files
# Declare a reader for csv files
rd = Reader(sep=',')
# Return a dictionnary containing three entries
# dict["train"] contains training samples withtout target columns
# dict["test"] contains testing elements withtout target columns
# dict["target"] contains target columns for training samples.
dict = rd.train_test_split(paths, target_name)

dft = Drift_thresholder()
dict = dft.fit_transform(dict)

# Tuning
mape = make_scorer(lambda y_true, y_pred: 100 * np.sum(
    np.abs(y_true - y_pred) / y_true) / len(y_true),
                   greater_is_better=False,
                   needs_proba=False)
# Declare an optimiser. You can declare your own score
# as presented here or use one in
# {"mean_absolute_error", "mean_squared_error","median_absolute_error","r2"}
opt = Optimiser(scoring=mape, n_folds=3)
opt.evaluate(None, dict)

# Space of hyperparameters
# The keys must respect the following syntax : "enc__param".
#   "enc" = "ne" for na encoder
#   "enc" = "ce" for categorical encoder
#   "enc" = "fs" for feature selector [OPTIONAL]
#   "enc" = "stck"+str(i) to add layer n°i of meta-features [OPTIONAL]
#   "enc" = "est" for the final estimator
#   "param" : a correct associated parameter for each step.