def real_fitted_model(data, features):
    """
    This fixture really fits the model and it takes time.
    Comment this and all dependant tests if you don't want to run real training.
    """

    X = data[features]

    model = LightGBMWrapper()
    model.fit(X, data[TARGET_COLUMN])

    return model
def test_wrapper_fit(data, features):

    X = data[features]

    model = LightGBMWrapper()

    # patch real training of lightgbm because it is time consuming
    with patch("lightgbm.train") as m:
        m.return_value = "mocked_value"
        model.fit(X, data[TARGET_COLUMN])

        assert model.categorical_features_extended is not None
        assert model.model == "mocked_value"
def test_custom_lgb_params_applied():

    model = LightGBMWrapper(
        lgb_params={
            "objective": "binary",
            "boosting": "gbdt",
            "metric": "auc"
        },
        lgb_training_params={
            "num_boost_round": 50,
            "early_stopping_rounds": 10
        },
        metrics={"auc": {
            "function": roc_auc_score,
            "use_proba": True
        }},
    )

    assert model.lgb_params == {
        "objective": "binary",
        "boosting": "gbdt",
        "metric": "auc",
    }

    assert model.lgb_training_params == {
        "num_boost_round": 50,
        "early_stopping_rounds": 10,
    }

    assert model.metrics == {
        "auc": {
            "function": roc_auc_score,
            "use_proba": True
        }
    }
def test_evaluate():

    model = LightGBMWrapper()

    # patch real predictions
    with patch("autoboosting.auto_estimator.LightGBMWrapper.predict"
               ) as pr, patch(
                   "autoboosting.auto_estimator.LightGBMWrapper.predict_proba"
               ) as pr_pr:

        fake_ys = np.array([1, 0, 0, 1])
        pr.return_value = fake_ys
        pr_pr.return_value = np.array([0.8, 0.2, 0.1, 0.9])

        metrics = model.evaluate(pd.DataFrame(), fake_ys)

    assert metrics["f1_score"] == 1.0
Ejemplo n.º 5
0
def train_model(
    path: pathlib.Path,
    filename: str,
    output_path: pathlib.Path,
) -> None:

    path_to_train = path / filename

    df = pd.read_csv(path_to_train, index_col=None)

    feature_columns = [
        i for i in df.columns if i not in (TARGET_COLUMN, ID_COLUMN)
    ]

    model = LightGBMWrapper()
    model.fit(df[feature_columns], df[TARGET_COLUMN])

    output_path = output_path / "classifier"
    print(output_path)
Ejemplo n.º 6
0
def lightgbm_wrapper_cross_val_score(
    lgb_wrapper_params,
    X: pd.DataFrame,
    y: np.ndarray,
    random_state: Optional[int] = None,
    cv: int = 2,
):
    kfold = StratifiedKFold(n_splits=cv,
                            shuffle=True,
                            random_state=random_state)

    metrics_agg: Dict[str, List[float]] = defaultdict(list)

    for train, test in kfold.split(X, y):

        estimator = LightGBMWrapper(**lgb_wrapper_params)

        estimator.fit(X.iloc[train], y[train])
        metrics = estimator.evaluate(X.iloc[test], y[test])

        for k, v in metrics.items():
            metrics_agg[k].append(v)

    return {k: np.mean(v) for k, v in metrics_agg.items()}
def test_non_fitted_model_exception():
    model = LightGBMWrapper()

    with pytest.raises(ModelNotFittedException):
        model.predict(pd.DataFrame())