Ejemplo n.º 1
0
def test_evaluate_classification_optimiser():
    """Test evaluate method of Optimiser class for classication."""
    reader = Reader(sep=",")
    dict = reader.train_test_split(
        Lpath=["data_for_tests/train.csv", "data_for_tests/test.csv"],
        target_name="Survived")
    drift_thresholder = Drift_thresholder()
    drift_thresholder = drift_thresholder.fit_transform(dict)

    with pytest.warns(UserWarning) as record:
        opt = Optimiser(scoring=None, n_folds=3)
    assert len(record) == 1
    score = opt.evaluate(None, dict)
    assert -np.Inf <= score

    with pytest.warns(UserWarning) as record:
        opt = Optimiser(scoring="roc_auc", n_folds=3)
    assert len(record) == 1
    score = opt.evaluate(None, dict)
    assert 0. <= score <= 1.

    with pytest.warns(UserWarning) as record:
        opt = Optimiser(scoring="wrong_scoring", n_folds=3)
    assert len(record) == 1
    with pytest.warns(UserWarning) as record:
        score = opt.evaluate(None, dict)
    assert opt.scoring == "neg_log_loss"
Ejemplo n.º 2
0
def test_evaluate_regression_optimiser():
    """Test evaluate method of Optimiser class for regression."""
    reader = Reader(sep=",")
    dict = reader.train_test_split(Lpath=[
        "data_for_tests/train_regression.csv",
        "data_for_tests/test_regression.csv"
    ],
                                   target_name="SalePrice")
    drift_thresholder = Drift_thresholder()
    drift_thresholder = drift_thresholder.fit_transform(dict)

    mape = make_scorer(lambda y_true, y_pred: 100 * np.sum(
        np.abs(y_true - y_pred) / y_true) / len(y_true),
                       greater_is_better=False,
                       needs_proba=False)
    with pytest.warns(UserWarning) as record:
        opt = Optimiser(scoring=mape, n_folds=3)
    assert len(record) == 1
    score = opt.evaluate(None, dict)
    assert -np.Inf <= score

    with pytest.warns(UserWarning) as record:
        opt = Optimiser(scoring=None, n_folds=3)
    assert len(record) == 1
    score = opt.evaluate(None, dict)
    assert -np.Inf <= score

    with pytest.warns(UserWarning) as record:
        opt = Optimiser(scoring="wrong_scoring", n_folds=3)
    assert len(record) == 1
    with pytest.warns(UserWarning) as record:
        score = opt.evaluate(None, dict)
    assert -np.Inf <= score
Ejemplo n.º 3
0
    def test_fit_predict_predictor_regression(mock_show):
        """Test fit_predict method of Predictor class for regression."""
        rd = Reader(sep=',')
        dt = rd.train_test_split(Lpath=[
            "data_for_tests/train_regression.csv",
            "data_for_tests/test_regression.csv"
        ],
                                 target_name="SalePrice")

        drift_thresholder = Drift_thresholder()
        df = drift_thresholder.fit_transform(dt)

        mape = make_scorer(lambda y_true, y_pred: 100 * np.sum(
            np.abs(y_true - y_pred) / y_true) / len(y_true),
                           greater_is_better=False,
                           needs_proba=False)
        opt = Optimiser(scoring=mape, n_folds=3)

        opt.evaluate(None, df)

        space = {
            'ne__numerical_strategy': {
                "search": "choice",
                "space": [0]
            },
            'ce__strategy': {
                "search":
                "choice",
                "space":
                ["label_encoding", "random_projection", "entity_embedding"]
            },
            'fs__threshold': {
                "search": "uniform",
                "space": [0.01, 0.3]
            },
            'est__max_depth': {
                "search": "choice",
                "space": [3, 4, 5, 6, 7]
            }
        }

        best = opt.optimise(space, df, 1)

        prd = Predictor(verbose=True)
        prd.fit_predict(best, df)
        pred_df = pd.read_csv("save/SalePrice_predictions.csv")
        assert np.all(
            list(pred_df.columns) == ['Unnamed: 0', 'SalePrice_predicted'])
        assert np.shape(pred_df) == (1459, 2)
def test_evaluate_and_optimise_classification():
    """Test evaluate_and_optimise method of Optimiser class."""
    reader = Reader(sep=",")

    dict = reader.train_test_split(
        Lpath=["data_for_tests/train.csv", "data_for_tests/test.csv"],
        target_name="Survived")
    drift_thresholder = Drift_thresholder()
    drift_thresholder = drift_thresholder.fit_transform(dict)

    with pytest.warns(UserWarning) as record:
        opt = Optimiser(scoring='accuracy', n_folds=3)
    assert len(record) == 1
    dict_error = dict.copy()
    dict_error["target"] = dict_error["target"].astype(str)
    with pytest.raises(ValueError):
        score = opt.evaluate(None, dict_error)

    with pytest.warns(UserWarning) as record:
        opt = Optimiser(scoring='accuracy', n_folds=3)
    assert len(record) == 1
    score = opt.evaluate(None, dict)
    assert 0. <= score <= 1.

    space = {
        'ne__numerical_strategy': {
            "search": "choice",
            "space": [0]
        },
        'ce__strategy': {
            "search": "choice",
            "space":
            ["label_encoding", "random_projection", "entity_embedding"]
        },
        'fs__threshold': {
            "search": "uniform",
            "space": [0.01, 0.3]
        },
        'est__max_depth': {
            "search": "choice",
            "space": [3, 4, 5, 6, 7]
        }
    }

    best = opt.optimise(space, dict, 1)
    assert type(best) == type(dict)
def test_init_drift_thresholder():
    """Test init method of Drift_thresholder class."""
    drift_thresholder = Drift_thresholder()
    assert drift_thresholder.threshold == 0.6
    assert not drift_thresholder.inplace
    assert drift_thresholder.verbose
    assert drift_thresholder.to_path == "save"
    assert drift_thresholder._Drift_thresholder__Ddrifts == {}
    assert not drift_thresholder._Drift_thresholder__fitOK
Ejemplo n.º 6
0
def test_fit_predict_predictor_classification():
    """Test fit_predict method of Predictor class for classification."""
    reader = Reader(sep=",")
    dict = reader.train_test_split(
        Lpath=["data_for_tests/train.csv", "data_for_tests/test.csv"],
        target_name="Survived")
    drift_thresholder = Drift_thresholder()
    drift_thresholder = drift_thresholder.fit_transform(dict)

    with pytest.warns(UserWarning) as record:
        opt = Optimiser(scoring='accuracy', n_folds=3)
    assert len(record) == 1

    space = {
        'ne__numerical_strategy': {
            "search": "choice",
            "space": [0]
        },
        'ce__strategy': {
            "search": "choice",
            "space":
            ["label_encoding", "random_projection", "entity_embedding"]
        },
        'fs__threshold': {
            "search": "uniform",
            "space": [0.01, 0.3]
        },
        'est__max_depth': {
            "search": "choice",
            "space": [3, 4, 5, 6, 7]
        }
    }

    optimal_hyper_parameters = opt.optimise(space, dict, 1)

    predictor = Predictor(verbose=False)
    predictor.fit_predict(optimal_hyper_parameters, dict)
    pred_df = pd.read_csv("save/Survived_predictions.csv")
    assert np.all(
        list(pred_df.columns) ==
        ['Unnamed: 0', '0.0', '1.0', 'Survived_predicted'])
    assert np.shape(pred_df) == (418, 4)
def test_drifts():
    """Test drifts method of Drift_thresholder class."""
    drift_thresholder = Drift_thresholder()
    with pytest.raises(ValueError):
        drift_thresholder.drifts()
    reader = Reader(sep=",")
    dict = reader.train_test_split(
        Lpath=["data_for_tests/train.csv", "data_for_tests/test.csv"],
        target_name="Survived")
    drift_thresholder.fit_transform(dict)
    drifts = drift_thresholder.drifts()
    assert drifts != {}
def test_fit_transform():
    """Test fit transform method of Drift_thresholder class."""
    drift_thresholder = Drift_thresholder()
    reader = Reader(sep=",")
    dict = reader.train_test_split(Lpath=["data_for_tests/train.csv"],
                                   target_name="Survived")
    drift_thresholder.fit_transform(dict)
    assert not drift_thresholder._Drift_thresholder__fitOK
    dict = reader.train_test_split(
        Lpath=["data_for_tests/train.csv", "data_for_tests/test.csv"],
        target_name="Survived")
    drift_thresholder.fit_transform(dict)
    assert drift_thresholder._Drift_thresholder__fitOK
    dict = reader.train_test_split(Lpath=[
        "data_for_tests/inplace_train.csv", "data_for_tests/inplace_test.csv"
    ],
                                   target_name="Survived")
    drift_thresholder.inplace = True
    drift_thresholder.fit_transform(dict)
    assert drift_thresholder._Drift_thresholder__fitOK
Ejemplo n.º 9
0
from mlbox.preprocessing.reader import Reader
from mlbox.preprocessing.drift_thresholder import Drift_thresholder
from mlbox.optimisation.optimiser import Optimiser 
from mlbox.prediction.predictor import Predictor
#from mlbox.encoding import Categorical_encoder
from mlbox.model.classification import StackingClassifier, Classifier
import pandas as pd 

paths = ["train_1.csv", "test.csv"]
target_name = "Class"

rd = Reader(sep=",")
df = rd.train_test_split(paths, target_name)
print(df["train"].head())

dft = Drift_thresholder()
df = dft.fit_transform(df)

opt = Optimiser()
warnings.filterwarnings('ignore', category=DeprecationWarning)
score = opt.evaluate(None, df)

space = {
        'ne__numerical_strategy':{"search":"choice",
                                 "space":[0, "mean"]},
        'ce__strategy':{"search":"choice",
                        "space":["label_encoding", "random_projection", "entity_embedding"]}, 
        'fs__threshold':{"search":"uniform",
                        "space":[0.001, 0.2]}, 
        'est__strategy':{"search":"choice", 
                         "space":["RandomForest", "ExtraTrees", "LightGBM"]},