Example #1
0
def _build_fil_classifier(m, data, args, tmpdir):
    """Setup function for FIL classification benchmarking"""
    from cuml.common.import_utils import has_xgboost
    if has_xgboost():
        import xgboost as xgb
    else:
        raise ImportError("No XGBoost package found")

    train_data, train_label = _training_data_to_numpy(data[0], data[1])

    dtrain = xgb.DMatrix(train_data, label=train_label)

    params = {
        "silent": 1, "eval_metric": "error",
        "objective": "binary:logistic", "tree_method": "gpu_hist",
    }
    params.update(args)
    max_depth = args["max_depth"]
    num_rounds = args["num_rounds"]
    n_feature = data[0].shape[1]
    train_size = data[0].shape[0]
    model_name = f"xgb_{max_depth}_{num_rounds}_{n_feature}_{train_size}.model"
    model_path = os.path.join(tmpdir, model_name)
    bst = xgb.train(params, dtrain, num_rounds)
    bst.save_model(model_path)

    return m.load(model_path, algo=args["fil_algo"],
                  output_class=args["output_class"],
                  threshold=args["threshold"],
                  storage_type=args["storage_type"])
Example #2
0
def _build_treelite_classifier(m, data, args, tmpdir):
    """Setup function for treelite classification benchmarking"""
    from cuml.common.import_utils import has_treelite, has_xgboost
    if has_treelite():
        import treelite
        import treelite.runtime
    else:
        raise ImportError("No treelite package found")
    if has_xgboost():
        import xgboost as xgb
    else:
        raise ImportError("No XGBoost package found")

    max_depth = args["max_depth"]
    num_rounds = args["num_rounds"]
    n_feature = data[0].shape[1]
    train_size = data[0].shape[0]
    model_name = f"xgb_{max_depth}_{num_rounds}_{n_feature}_{train_size}.model"
    model_path = os.path.join(tmpdir, model_name)

    bst = xgb.Booster()
    bst.load_model(model_path)
    tl_model = treelite.Model.from_xgboost(bst)
    tl_model.export_lib(
        toolchain="gcc", libpath=model_path+"treelite.so",
        params={'parallel_comp': 40}, verbose=False
    )
    return treelite.runtime.Predictor(model_path+"treelite.so", verbose=False)
Example #3
0
def test_fil_input_types(input_type):
    pair = algorithms.algorithm_by_name('FIL')

    if not has_xgboost():
        pytest.xfail()

    runner = AccuracyComparisonRunner(
        [20], [5], dataset_name='classification', test_fraction=0.5,
        input_type=input_type)
    results = runner.run(pair, run_cpu=False)[0]
    assert results["cuml_acc"] is not None
Example #4
0
def test_real_algos_runner(algo_name):
    pair = algorithms.algorithm_by_name(algo_name)

    if (algo_name == 'UMAP' and not has_umap()) or \
       (algo_name == 'FIL' and not has_xgboost()):
        pytest.xfail()

    runner = AccuracyComparisonRunner([20], [5],
                                      dataset_name='classification',
                                      test_fraction=0.20)
    results = runner.run(pair)[0]
    print(results)
    assert results["cuml_acc"] is not None
Example #5
0
import pytest
import os

from cuml import ForestInference
from cuml.test.utils import array_equal, unit_param, \
    quality_param, stress_param
from cuml.common.import_utils import has_xgboost
from cuml.common.import_utils import has_lightgbm

from sklearn.datasets import make_classification, make_regression
from sklearn.ensemble import GradientBoostingClassifier, \
    GradientBoostingRegressor, RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.model_selection import train_test_split

if has_xgboost():
    import xgboost as xgb


def simulate_data(m, n, k=2, random_state=None, classification=True, bias=0.0):
    if classification:
        features, labels = make_classification(n_samples=m,
                                               n_features=n,
                                               n_informative=int(n / 5),
                                               n_classes=k,
                                               random_state=random_state)
    else:
        features, labels = make_regression(n_samples=m,
                                           n_features=n,
                                           n_informative=int(n / 5),
                                           n_targets=1,
Example #6
0
# limitations under the License.
#

import pytest
import treelite
import numpy as np
import cupy as cp
import cudf
from cuml.experimental.explainer.tree_shap import TreeExplainer
from cuml.common.import_utils import has_xgboost, has_shap
from cuml.common.exceptions import NotFittedError
from cuml.ensemble import RandomForestRegressor as curfr
from cuml.ensemble import RandomForestClassifier as curfc
from sklearn.datasets import make_regression, make_classification

if has_xgboost():
    import xgboost as xgb
if has_shap():
    import shap


@pytest.mark.parametrize('objective', [
    'reg:linear', 'reg:squarederror', 'reg:squaredlogerror',
    'reg:pseudohubererror'
])
@pytest.mark.skipif(not has_xgboost(), reason="need to install xgboost")
@pytest.mark.skipif(not has_shap(), reason="need to install shap")
def test_xgb_regressor(objective):
    n_samples = 100
    X, y = make_regression(n_samples=n_samples,
                           n_features=8,