Ejemplo n.º 1
0
def learn_model(draw, X, y, task, learner, n_estimators, n_targets):
    # for lgbm or xgb return the booster or sklearn object?
    use_sklearn_estimator = draw(st.booleans())
    if learner == 'xgb':
        assume(has_xgboost())
        if task == 'regression':
            objective = draw(
                st.sampled_from(['reg:squarederror', 'reg:pseudohubererror']))
            model = xgb.XGBRegressor(n_estimators=n_estimators,
                                     tree_method='gpu_hist',
                                     objective=objective,
                                     enable_categorical=True,
                                     verbosity=0).fit(X, y)
        elif task == 'classification':
            valid_objectives = [
                'binary:logistic',
                'binary:hinge',
                'binary:logitraw',
                'count:poisson',
            ]
            if n_targets > 2:
                valid_objectives += [
                    'rank:pairwise', 'rank:ndcg', 'rank:map', 'multi:softmax',
                    'multi:softprob'
                ]

            objective = draw(st.sampled_from(valid_objectives))
            model = xgb.XGBClassifier(n_estimators=n_estimators,
                                      tree_method='gpu_hist',
                                      objective=objective,
                                      enable_categorical=True,
                                      verbosity=0).fit(X, y)
        pred = model.predict(X, output_margin=True)
        if not use_sklearn_estimator:
            model = model.get_booster()
        return model, pred
    elif learner == 'rf':
        predict_model = 'GPU ' if y.dtype == np.float32 else 'CPU'
        if task == 'regression':
            model = cuml.ensemble.RandomForestRegressor(
                n_estimators=n_estimators)
            model.fit(X, y)
            pred = model.predict(X, predict_model=predict_model)
        elif task == 'classification':
            model = cuml.ensemble.RandomForestClassifier(
                n_estimators=n_estimators)
            model.fit(X, y)
            pred = model.predict_proba(X)
        return model, pred
    elif learner == 'skl_rf':
        assume(has_sklearn())
        if task == 'regression':
            model = sklrfr(n_estimators=n_estimators)
            model.fit(X, y)
            pred = model.predict(X)
        elif task == 'classification':
            model = sklrfc(n_estimators=n_estimators)
            model.fit(X, y)
            pred = model.predict_proba(X)
        return model, pred
    elif learner == 'lgbm':
        assume(has_lightgbm())
        if task == 'regression':
            model = lgb.LGBMRegressor(n_estimators=n_estimators).fit(X, y)
        elif task == 'classification':
            model = lgb.LGBMClassifier(n_estimators=n_estimators).fit(X, y)
        pred = model.predict(X, raw_score=True)
        if not use_sklearn_estimator:
            model = model.booster_
        return model, pred
Ejemplo n.º 2
0
@pytest.mark.skipif(has_xgboost() is False, reason="need to install xgboost")
def test_output_args(small_classifier_and_preds):
    model_path, X, xgb_preds = small_classifier_and_preds
    fm = ForestInference.load(model_path,
                              algo='TREE_REORG',
                              output_class=False,
                              threshold=0.50)
    X = np.asarray(X)
    fil_preds = fm.predict(X)
    fil_preds = np.reshape(fil_preds, np.shape(xgb_preds))

    assert array_equal(fil_preds, xgb_preds, 1e-3)


@pytest.mark.parametrize('num_classes', [2, 5])
@pytest.mark.skipif(has_lightgbm() is False, reason="need to install lightgbm")
def test_lightgbm(tmp_path, num_classes):
    import lightgbm as lgb
    X, y = simulate_data(500,
                         10 if num_classes == 2 else 50,
                         num_classes,
                         random_state=43210,
                         classification=True)
    train_data = lgb.Dataset(X, label=y)
    num_round = 5
    model_path = str(os.path.join(tmp_path, 'lgb.model'))

    if num_classes == 2:
        param = {
            'objective': 'binary',
            'metric': 'binary_logloss',
Ejemplo n.º 3
0
from cuml.common.exceptions import NotFittedError
from cuml.ensemble import RandomForestRegressor as curfr
from cuml.ensemble import RandomForestClassifier as curfc
import cuml
from cuml.testing.utils import as_type

# See issue #4729
# Xgboost disabled due to CI failures
xgb = None


def has_xgboost():
    return False


if has_lightgbm():
    import lightgbm as lgb
if has_shap():
    import shap
if has_sklearn():
    from sklearn.datasets import make_regression, make_classification
    from sklearn.ensemble import RandomForestRegressor as sklrfr
    from sklearn.ensemble import RandomForestClassifier as sklrfc


def make_classification_with_categorical(*,
                                         n_samples,
                                         n_features,
                                         n_categorical,
                                         n_informative,
                                         n_redundant,