Esempio n. 1
0
def test_benchmarkcv_binary(binary_var):
    X_train, y_train = binary_var["X_train"], binary_var["y_train"]
    metrics, metrics_proba, metrics_kwargs, sort_by, ascending = get_default_metric(
        "binary"
    )
    cv = 3

    result = bm_cv(
        X_train,
        y_train,
        cv,
        metrics,
        metrics_proba,
        metrics_kwargs,
        binary_model_dict(),
    )

    assert list(result.groupby("cv_idx").size().values) == [11] * cv
    assert list(result.groupby("model_name").size().values) == [3] * len(
        binary_model_dict()
    )
    assert result["accuracy_score"].mean() > 0.85
    assert result["recall_score"].mean() > 0.85
    assert result["precision_score"].mean() > 0.85
    assert result["f1_score"].mean() > 0.85
    assert result["roc_auc_score"].mean() > 0.85
    assert result["pr_auc_score"].mean() > 0.85
Esempio n. 2
0
def test_benchmarkcv_regression(regression_var):
    X_train, y_train = regression_var["X_train"], regression_var["y_train"]
    metrics, metrics_proba, metrics_kwargs, sort_by, ascending = get_default_metric(
        "regression"
    )
    cv = 3

    result = bm_cv(
        X_train,
        y_train,
        cv,
        metrics,
        metrics_proba,
        metrics_kwargs,
        regression_model_dict(),
    )

    assert list(result.groupby("cv_idx").size().values) == [13] * cv
    assert list(result.groupby("model_name").size().values) == [3] * len(
        regression_model_dict()
    )
    assert result["mean_absolute_error"].mean() < 8
    assert result["mean_squared_error"].mean() < 100
    assert result["r2_score"].mean() > 0.5
    assert result["explained_variance_score"].mean() > 0.5
Esempio n. 3
0
def test_get_default_metric_binary():
    metrics, metrics_proba, metrics_kwargs, sort_by, ascending = get_default_metric(
        "binary"
    )

    assert metrics == [accuracy_score, recall_score, precision_score, f1_score]
    assert metrics_proba == [roc_auc_score, pr_auc_score]
    assert metrics_kwargs == {}
    assert sort_by == f1_score.__name__
    assert not ascending
Esempio n. 4
0
def test_get_default_metric_regression():
    metrics, metrics_proba, metrics_kwargs, sort_by, ascending = get_default_metric(
        "regression"
    )

    assert metrics == [
        mean_absolute_error,
        mean_squared_error,
        r2_score,
        explained_variance_score,
    ]
    assert metrics_proba == []
    assert metrics_kwargs == {}
    assert sort_by == mean_absolute_error.__name__
    assert ascending
Esempio n. 5
0
def test_get_default_metric_multiclass():
    metrics, metrics_proba, metrics_kwargs, sort_by, ascending = get_default_metric(
        "multiclass"
    )

    assert metrics == [accuracy_score, recall_score, precision_score, f1_score]
    assert metrics_proba == [roc_auc_score]
    assert metrics_kwargs == {
        "recall_score": {"average": "macro"},
        "precision_score": {"average": "macro"},
        "f1_score": {"average": "macro"},
        "roc_auc_score": {"average": "macro"},
    }
    assert sort_by == f1_score.__name__
    assert not ascending
Esempio n. 6
0
def test_benchmark_binary(binary_var):
    X_train, X_val = binary_var["X_train"], binary_var["X_val"]
    y_train, y_val = binary_var["y_train"], binary_var["y_val"]
    metrics, metrics_proba, metrics_kwargs, sort_by, ascending = get_default_metric(
        "binary"
    )
    result = bm(
        X_train,
        y_train,
        X_val,
        y_val,
        metrics,
        metrics_proba,
        metrics_kwargs,
        binary_model_dict(),
    )

    assert list(result.columns) == [
        "model_name",
        "accuracy_score",
        "recall_score",
        "precision_score",
        "f1_score",
        "roc_auc_score",
        "pr_auc_score",
    ]
    assert result.shape == (11, 7)
    assert list(result["model_name"]) == [
        "LogisticRegression",
        "GaussianNB",
        "KNeighborsClassifier",
        "DecisionTreeClassifier",
        "AdaBoostClassifier",
        "BaggingClassifier",
        "ExtraTreesClassifier",
        "GradientBoostingClassifier",
        "RandomForestClassifier",
        "XGBoost",
        "LightGBM",
    ]
    assert result["accuracy_score"].mean() > 0.85
    assert result["recall_score"].mean() > 0.85
    assert result["precision_score"].mean() > 0.85
    assert result["f1_score"].mean() > 0.85
    assert result["roc_auc_score"].mean() > 0.85
    assert result["pr_auc_score"].mean() > 0.85
Esempio n. 7
0
def test_benchmark_regression(regression_var):
    X_train, X_val = regression_var["X_train"], regression_var["X_val"]
    y_train, y_val = regression_var["y_train"], regression_var["y_val"]
    metrics, metrics_proba, metrics_kwargs, sort_by, ascending = get_default_metric(
        "regression"
    )
    result = bm(
        X_train,
        y_train,
        X_val,
        y_val,
        metrics,
        metrics_proba,
        metrics_kwargs,
        regression_model_dict(),
    )

    assert list(result.columns) == [
        "model_name",
        "mean_absolute_error",
        "mean_squared_error",
        "r2_score",
        "explained_variance_score",
    ]
    assert result.shape == (13, 5)
    assert list(result["model_name"]) == [
        "LinearRegression",
        "Ridge",
        "Lasso",
        "ElasticNet",
        "KNeighborsRegressor",
        "DecisionTreeRegressor",
        "AdaBoostRegressor",
        "BaggingRegressor",
        "ExtraTreesRegressor",
        "GradientBoostingRegressor",
        "RandomForestRegressor",
        "XGBoost",
        "LightGBM",
    ]
    assert result["mean_absolute_error"].mean() < 8
    assert result["mean_squared_error"].mean() < 100
    assert result["r2_score"].mean() > 0.5
    assert result["explained_variance_score"].mean() > 0.5
Esempio n. 8
0
def test_benchmark_multiclass(multiclass_var):
    X_train, X_val = multiclass_var["X_train"], multiclass_var["X_val"]
    y_train, y_val = multiclass_var["y_train"], multiclass_var["y_val"]
    num_class = np.unique(y_train).size
    metrics, metrics_proba, metrics_kwargs, sort_by, ascending = get_default_metric(
        "multiclass"
    )
    result = bm(
        X_train,
        y_train,
        X_val,
        y_val,
        metrics,
        metrics_proba,
        metrics_kwargs,
        multiclass_model_dict(num_class=num_class),
    )

    assert list(result.columns) == [
        "model_name",
        "accuracy_score",
        "recall_score",
        "precision_score",
        "f1_score",
        "roc_auc_score",
    ]
    assert result.shape == (8, 6)
    assert list(result["model_name"]) == [
        "LogisticRegression",
        "GaussianNB",
        "KNeighborsClassifier",
        "DecisionTreeClassifier",
        "ExtraTreesClassifier",
        "RandomForestClassifier",
        "XGBoost",
        "LightGBM",
    ]
    assert result["accuracy_score"].mean() > 0.75
    assert result["recall_score"].mean() > 0.75
    assert result["precision_score"].mean() > 0.75
    assert result["f1_score"].mean() > 0.75
    assert result["roc_auc_score"].mean() > 0.75
Esempio n. 9
0
def test_get_default_metric_error():
    with pytest.raises(ValueError):
        get_default_metric("wrongtype")