Esempio n. 1
0
def test_fit_with_unused_fit_params(tmp_path: pathlib.Path) -> None:
    X, y = load_breast_cancer(return_X_y=True)

    clf = OGBMClassifier(n_estimators=n_estimators,
                         n_trials=n_trials,
                         model_dir=tmp_path)

    clf.fit(X, y, eval_set=None)
Esempio n. 2
0
def test_fit_with_invalid_study(tmp_path: pathlib.Path) -> None:
    X, y = load_breast_cancer(return_X_y=True)

    study = study_module.create_study(direction="maximize")
    clf = OGBMClassifier(study=study, model_dir=tmp_path)

    with pytest.raises(ValueError):
        clf.fit(X, y)
Esempio n. 3
0
def test_hasattr(tmp_path: pathlib.Path, refit: bool,
                 early_stopping_rounds: int) -> None:
    X, y = load_breast_cancer(return_X_y=True)

    clf = OGBMClassifier(
        n_estimators=n_estimators,
        n_trials=n_trials,
        refit=refit,
        model_dir=tmp_path,
    )

    attrs = {
        "classes_": np.ndarray,
        "best_index_": int,
        "best_iteration_": (int, type(None)),
        "best_params_": dict,
        "best_score_": float,
        "booster_": (lgb.Booster, _VotingBooster),
        "encoder_": LabelEncoder,
        "feature_importances_": np.ndarray,
        "n_classes_": int,
        "n_features_": int,
        "n_splits_": int,
        "study_": study_module.Study,
    }

    for attr in attrs:
        with pytest.raises(AttributeError):
            getattr(clf, attr)

    clf.fit(X, y, early_stopping_rounds=early_stopping_rounds)

    for attr, klass in attrs.items():
        assert isinstance(getattr(clf, attr), klass)

    if refit:
        assert hasattr(clf, "refit_time_")

        assert clf.booster_.best_iteration == 0

    else:
        assert not hasattr(clf, "refit_time_")

        boosters = clf.booster_.boosters

        for b in boosters:
            if early_stopping_rounds is None:
                assert b.best_iteration == clf.n_estimators
            else:
                assert b.best_iteration == clf.best_iteration_

    if early_stopping_rounds is None:
        assert clf.best_iteration_ is None
    else:
        assert clf.best_iteration_ > 0
Esempio n. 4
0
def test_fit_with_fit_params(
    tmp_path: pathlib.Path,
    callbacks: Optional[List[Callable]],
    eval_metric: Union[Callable, str],
) -> None:
    X, y = load_breast_cancer(return_X_y=True)

    clf = OGBMClassifier(n_estimators=n_estimators,
                         n_trials=n_trials,
                         model_dir=tmp_path)

    clf.fit(X, y, callbacks=callbacks, eval_metric=eval_metric)
Esempio n. 5
0
def test_predict_with_unused_predict_params(tmp_path: pathlib.Path) -> None:
    X, y = load_breast_cancer(return_X_y=True)

    clf = OGBMClassifier(n_estimators=n_estimators,
                         n_trials=n_trials,
                         model_dir=tmp_path)

    clf.fit(X, y)

    y_pred = clf.predict(X, pred_leaf=False)

    assert isinstance(y_pred, np.ndarray)
    assert y.shape == y_pred.shape
Esempio n. 6
0
def test_plot_importance(tmp_path: pathlib.Path, n_jobs: int) -> None:
    X, y = load_breast_cancer(return_X_y=True)

    clf = OGBMClassifier(
        n_estimators=n_estimators,
        n_jobs=n_jobs,
        n_trials=n_trials,
        refit=False,
        model_dir=tmp_path,
    )

    clf.fit(X, y)

    lgb.plot_importance(clf)
Esempio n. 7
0
def test_refit(tmp_path: pathlib.Path,
               early_stopping_rounds: Optional[int]) -> None:
    X, y = load_breast_cancer(return_X_y=True)

    clf = OGBMClassifier(
        n_estimators=n_estimators,
        n_trials=n_trials,
        random_state=random_state,
        refit=True,
        model_dir=tmp_path,
    )

    clf.fit(X, y, early_stopping_rounds=early_stopping_rounds)

    y_pred = clf.predict(X)

    if early_stopping_rounds is None:
        _n_estimators = n_estimators
    else:
        _n_estimators = clf.best_iteration_

    clf = lgb.LGBMClassifier(n_estimators=_n_estimators, **clf.best_params_)

    clf.fit(X, y)

    np.testing.assert_array_equal(y_pred, clf.predict(X))
Esempio n. 8
0
def test_predict_with_predict_params(tmp_path: pathlib.Path,
                                     num_iteration: Optional[int]) -> None:
    X, y = load_breast_cancer(return_X_y=True)

    clf = OGBMClassifier(n_estimators=n_estimators,
                         n_trials=n_trials,
                         model_dir=tmp_path)

    clf.fit(X, y)

    y_pred = clf.predict(X, num_iteration=num_iteration)

    assert isinstance(y_pred, np.ndarray)
    assert y.shape == y_pred.shape
Esempio n. 9
0
def test_fit_twice_without_study(tmp_path: pathlib.Path, n_jobs: int) -> None:
    X, y = load_breast_cancer(return_X_y=True)

    clf = OGBMClassifier(
        n_estimators=n_estimators,
        n_jobs=n_jobs,
        n_trials=n_trials,
        random_state=random_state,
        model_dir=tmp_path,
    )

    clf.fit(X, y)

    df = clf.study_.trials_dataframe()
    values = df["value"]

    clf = OGBMClassifier(
        bagging_fraction=1.0,
        bagging_freq=0,
        feature_fraction=1.0,
        lambda_l1=0.0,
        lambda_l2=0.0,
        min_data_in_leaf=20,
        n_estimators=n_estimators,
        n_jobs=n_jobs,
        n_trials=n_trials,
        random_state=random_state,
        model_dir=tmp_path,
    )

    clf.fit(X, y)

    df = clf.study_.trials_dataframe()

    np.testing.assert_array_equal(values, df["value"])
Esempio n. 10
0
def test_fit_with_group_k_fold(tmp_path: pathlib.Path) -> None:
    X, y = load_breast_cancer(return_X_y=True)

    clf = OGBMClassifier(
        cv=GroupKFold(5),
        n_estimators=n_estimators,
        n_trials=n_trials,
        model_dir=tmp_path,
    )

    n_samples, _ = X.shape
    groups = np.random.choice(10, size=n_samples)

    clf.fit(X, y, groups=groups)
Esempio n. 11
0
def test_fit_with_pruning(tmp_path: pathlib.Path) -> None:
    X, y = load_breast_cancer(return_X_y=True)

    clf = OGBMClassifier(enable_pruning=True, model_dir=tmp_path)

    clf.fit(X, y)

    if hasattr(clf.study_, "get_trials"):
        trials = clf.study_.get_trials()
    else:
        trials = clf.study_.trials

    pruned_trials = [t for t in trials if t.state == structs.TrialState.PRUNED]

    assert len(pruned_trials) > 0
Esempio n. 12
0
def test_score(tmp_path: pathlib.Path) -> None:
    X, y = load_breast_cancer(return_X_y=True)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, random_state=random_state)

    clf = lgb.LGBMClassifier(random_state=random_state)

    clf.fit(X_train, y_train)

    score = clf.score(X_test, y_test)

    clf = OGBMClassifier(random_state=random_state, model_dir=tmp_path)

    clf.fit(X_train, y_train)

    assert score <= clf.score(X_test, y_test)
Esempio n. 13
0
def test_fit_with_empty_param_distributions(tmp_path: pathlib.Path) -> None:
    X, y = load_breast_cancer(return_X_y=True)

    clf = OGBMClassifier(
        colsample_bytree=0.1,
        n_estimators=n_estimators,
        n_trials=n_trials,
        param_distributions={},
        model_dir=tmp_path,
    )

    clf.fit(X, y)

    df = clf.study_.trials_dataframe()
    values = df["value"]

    assert values.nunique() == 1
Esempio n. 14
0
def test_ogbm_classifier(tmp_path: pathlib.Path) -> None:
    pytest.importorskip("sklearn", minversion="0.20.0")

    # from sklearn.utils.estimator_checks import check_estimator
    from sklearn.utils.estimator_checks import check_estimators_pickle
    from sklearn.utils.estimator_checks import check_set_params

    clf = OGBMClassifier(model_dir=tmp_path)
    name = clf.__class__.__name__

    # check_estimator(clf)

    check_estimators_pickle(name, clf)
    check_set_params(name, clf)
Esempio n. 15
0
def test_fit_twice_with_study(tmp_path: pathlib.Path,
                              storage: Optional[str]) -> None:
    X, y = load_breast_cancer(return_X_y=True)

    study = study_module.create_study(storage=storage)
    clf = OGBMClassifier(
        n_estimators=n_estimators,
        n_trials=n_trials,
        study=study,
        model_dir=tmp_path,
    )

    clf.fit(X, y)

    assert len(study.trials) == n_trials

    clf.fit(X, y)

    assert len(study.trials) == 2 * n_trials
Esempio n. 16
0
def test_fit_with_params(
    tmp_path: pathlib.Path,
    boosting_type: str,
    objective: Optional[Union[Callable, str]],
) -> None:
    X, y = load_breast_cancer(return_X_y=True)

    clf = OGBMClassifier(
        boosting_type=boosting_type,
        n_estimators=n_estimators,
        n_trials=n_trials,
        objective=objective,
        model_dir=tmp_path,
    )

    # See https://github.com/microsoft/LightGBM/issues/2328
    if boosting_type == "rf" and callable(objective):
        with pytest.raises(lgb.basic.LightGBMError):
            clf.fit(X, y)
    else:
        clf.fit(X, y)