Example #1
0
def test_evaluate_regression_optimiser():
    """Test evaluate method of Optimiser class for regression."""
    reader = Reader(sep=",")
    dict = reader.train_test_split(Lpath=[
        "data_for_tests/train_regression.csv",
        "data_for_tests/test_regression.csv"
    ],
                                   target_name="SalePrice")
    drift_thresholder = Drift_thresholder()
    drift_thresholder = drift_thresholder.fit_transform(dict)

    mape = make_scorer(lambda y_true, y_pred: 100 * np.sum(
        np.abs(y_true - y_pred) / y_true) / len(y_true),
                       greater_is_better=False,
                       needs_proba=False)
    with pytest.warns(UserWarning) as record:
        opt = Optimiser(scoring=mape, n_folds=3)
    assert len(record) == 1
    score = opt.evaluate(None, dict)
    assert -np.Inf <= score

    with pytest.warns(UserWarning) as record:
        opt = Optimiser(scoring=None, n_folds=3)
    assert len(record) == 1
    score = opt.evaluate(None, dict)
    assert -np.Inf <= score

    with pytest.warns(UserWarning) as record:
        opt = Optimiser(scoring="wrong_scoring", n_folds=3)
    assert len(record) == 1
    with pytest.warns(UserWarning) as record:
        score = opt.evaluate(None, dict)
    assert -np.Inf <= score
Example #2
0
def test_evaluate_classification_optimiser():
    """Test evaluate method of Optimiser class for classication."""
    reader = Reader(sep=",")
    dict = reader.train_test_split(
        Lpath=["data_for_tests/train.csv", "data_for_tests/test.csv"],
        target_name="Survived")
    drift_thresholder = Drift_thresholder()
    drift_thresholder = drift_thresholder.fit_transform(dict)

    with pytest.warns(UserWarning) as record:
        opt = Optimiser(scoring=None, n_folds=3)
    assert len(record) == 1
    score = opt.evaluate(None, dict)
    assert -np.Inf <= score

    with pytest.warns(UserWarning) as record:
        opt = Optimiser(scoring="roc_auc", n_folds=3)
    assert len(record) == 1
    score = opt.evaluate(None, dict)
    assert 0. <= score <= 1.

    with pytest.warns(UserWarning) as record:
        opt = Optimiser(scoring="wrong_scoring", n_folds=3)
    assert len(record) == 1
    with pytest.warns(UserWarning) as record:
        score = opt.evaluate(None, dict)
    assert opt.scoring == "neg_log_loss"
Example #3
0
    def test_fit_predict_predictor_regression(mock_show):
        """Test fit_predict method of Predictor class for regression."""
        rd = Reader(sep=',')
        dt = rd.train_test_split(Lpath=[
            "data_for_tests/train_regression.csv",
            "data_for_tests/test_regression.csv"
        ],
                                 target_name="SalePrice")

        drift_thresholder = Drift_thresholder()
        df = drift_thresholder.fit_transform(dt)

        mape = make_scorer(lambda y_true, y_pred: 100 * np.sum(
            np.abs(y_true - y_pred) / y_true) / len(y_true),
                           greater_is_better=False,
                           needs_proba=False)
        opt = Optimiser(scoring=mape, n_folds=3)

        opt.evaluate(None, df)

        space = {
            'ne__numerical_strategy': {
                "search": "choice",
                "space": [0]
            },
            'ce__strategy': {
                "search":
                "choice",
                "space":
                ["label_encoding", "random_projection", "entity_embedding"]
            },
            'fs__threshold': {
                "search": "uniform",
                "space": [0.01, 0.3]
            },
            'est__max_depth': {
                "search": "choice",
                "space": [3, 4, 5, 6, 7]
            }
        }

        best = opt.optimise(space, df, 1)

        prd = Predictor(verbose=True)
        prd.fit_predict(best, df)
        pred_df = pd.read_csv("save/SalePrice_predictions.csv")
        assert np.all(
            list(pred_df.columns) == ['Unnamed: 0', 'SalePrice_predicted'])
        assert np.shape(pred_df) == (1459, 2)
def test_evaluate_and_optimise_classification():
    """Test evaluate_and_optimise method of Optimiser class."""
    reader = Reader(sep=",")

    dict = reader.train_test_split(
        Lpath=["data_for_tests/train.csv", "data_for_tests/test.csv"],
        target_name="Survived")
    drift_thresholder = Drift_thresholder()
    drift_thresholder = drift_thresholder.fit_transform(dict)

    with pytest.warns(UserWarning) as record:
        opt = Optimiser(scoring='accuracy', n_folds=3)
    assert len(record) == 1
    dict_error = dict.copy()
    dict_error["target"] = dict_error["target"].astype(str)
    with pytest.raises(ValueError):
        score = opt.evaluate(None, dict_error)

    with pytest.warns(UserWarning) as record:
        opt = Optimiser(scoring='accuracy', n_folds=3)
    assert len(record) == 1
    score = opt.evaluate(None, dict)
    assert 0. <= score <= 1.

    space = {
        'ne__numerical_strategy': {
            "search": "choice",
            "space": [0]
        },
        'ce__strategy': {
            "search": "choice",
            "space":
            ["label_encoding", "random_projection", "entity_embedding"]
        },
        'fs__threshold': {
            "search": "uniform",
            "space": [0.01, 0.3]
        },
        'est__max_depth': {
            "search": "choice",
            "space": [3, 4, 5, 6, 7]
        }
    }

    best = opt.optimise(space, dict, 1)
    assert type(best) == type(dict)
Example #5
0
from mlbox.model.classification import StackingClassifier, Classifier
import pandas as pd 

paths = ["train_1.csv", "test.csv"]
target_name = "Class"

rd = Reader(sep=",")
df = rd.train_test_split(paths, target_name)
print(df["train"].head())

dft = Drift_thresholder()
df = dft.fit_transform(df)

opt = Optimiser()
warnings.filterwarnings('ignore', category=DeprecationWarning)
score = opt.evaluate(None, df)

space = {
        'ne__numerical_strategy':{"search":"choice",
                                 "space":[0, "mean"]},
        'ce__strategy':{"search":"choice",
                        "space":["label_encoding", "random_projection", "entity_embedding"]}, 
        'fs__threshold':{"search":"uniform",
                        "space":[0.001, 0.2]}, 
        'est__strategy':{"search":"choice", 
                         "space":["RandomForest", "ExtraTrees", "LightGBM"]},
        'est__max_depth':{"search":"choice", 
                          "space":[8, 9, 10, 11, 12, 13]}
        }
"""
#Clf_feature_selector(strategy='l1', threshold=0.3)
Example #6
0
display(HTML('<style>.prompt{width: 0px; min-width: 0px; visibility: collapse}</style>'))
import warnings
warnings.filterwarnings("ignore")

paths = ["/content/drive/MyDrive/train.csv","/content/drive/MyDrive/test.csv"] 
target_name = "class"
rd = Reader(sep=",")
df = rd.train_test_split(paths, target_name)

df["train"].head()

dft = Drift_thresholder().fit_transform(df)

opt = Optimiser()
warnings.filterwarnings('ignore', category=DeprecationWarning)
score = opt.evaluate(None, df)

space = {
        'ne__numerical_strategy':{"search":"choice",
                                 "space":[0, "mean"]},
        'ce__strategy':{"search":"choice",
                        "space":["label_encoding", "random_projection", "entity_embedding"]}, 
        'fs__threshold':{"search":"uniform",
                        "space":[0.001, 0.2]}, 
        'est__strategy':{"search":"choice", 
                         "space":["RandomForest", "ExtraTrees", "LightGBM"]},
        'est__max_depth':{"search":"choice", 
                          "space":[-1, 9, 10, 11, 12, 13]}
        }

params = opt.optimise(space, df, 50)