def _build_fil_classifier(m, data, args, tmpdir): """Setup function for FIL classification benchmarking""" from cuml.common.import_utils import has_xgboost if has_xgboost(): import xgboost as xgb else: raise ImportError("No XGBoost package found") train_data, train_label = _training_data_to_numpy(data[0], data[1]) dtrain = xgb.DMatrix(train_data, label=train_label) params = { "silent": 1, "eval_metric": "error", "objective": "binary:logistic", "tree_method": "gpu_hist", } params.update(args) max_depth = args["max_depth"] num_rounds = args["num_rounds"] n_feature = data[0].shape[1] train_size = data[0].shape[0] model_name = f"xgb_{max_depth}_{num_rounds}_{n_feature}_{train_size}.model" model_path = os.path.join(tmpdir, model_name) bst = xgb.train(params, dtrain, num_rounds) bst.save_model(model_path) return m.load(model_path, algo=args["fil_algo"], output_class=args["output_class"], threshold=args["threshold"], storage_type=args["storage_type"])
def _build_treelite_classifier(m, data, args, tmpdir): """Setup function for treelite classification benchmarking""" from cuml.common.import_utils import has_treelite, has_xgboost if has_treelite(): import treelite import treelite.runtime else: raise ImportError("No treelite package found") if has_xgboost(): import xgboost as xgb else: raise ImportError("No XGBoost package found") max_depth = args["max_depth"] num_rounds = args["num_rounds"] n_feature = data[0].shape[1] train_size = data[0].shape[0] model_name = f"xgb_{max_depth}_{num_rounds}_{n_feature}_{train_size}.model" model_path = os.path.join(tmpdir, model_name) bst = xgb.Booster() bst.load_model(model_path) tl_model = treelite.Model.from_xgboost(bst) tl_model.export_lib( toolchain="gcc", libpath=model_path+"treelite.so", params={'parallel_comp': 40}, verbose=False ) return treelite.runtime.Predictor(model_path+"treelite.so", verbose=False)
def test_fil_input_types(input_type): pair = algorithms.algorithm_by_name('FIL') if not has_xgboost(): pytest.xfail() runner = AccuracyComparisonRunner( [20], [5], dataset_name='classification', test_fraction=0.5, input_type=input_type) results = runner.run(pair, run_cpu=False)[0] assert results["cuml_acc"] is not None
def test_real_algos_runner(algo_name): pair = algorithms.algorithm_by_name(algo_name) if (algo_name == 'UMAP' and not has_umap()) or \ (algo_name == 'FIL' and not has_xgboost()): pytest.xfail() runner = AccuracyComparisonRunner([20], [5], dataset_name='classification', test_fraction=0.20) results = runner.run(pair)[0] print(results) assert results["cuml_acc"] is not None
import pytest import os from cuml import ForestInference from cuml.test.utils import array_equal, unit_param, \ quality_param, stress_param from cuml.common.import_utils import has_xgboost from cuml.common.import_utils import has_lightgbm from sklearn.datasets import make_classification, make_regression from sklearn.ensemble import GradientBoostingClassifier, \ GradientBoostingRegressor, RandomForestClassifier, RandomForestRegressor from sklearn.metrics import accuracy_score, mean_squared_error from sklearn.model_selection import train_test_split if has_xgboost(): import xgboost as xgb def simulate_data(m, n, k=2, random_state=None, classification=True, bias=0.0): if classification: features, labels = make_classification(n_samples=m, n_features=n, n_informative=int(n / 5), n_classes=k, random_state=random_state) else: features, labels = make_regression(n_samples=m, n_features=n, n_informative=int(n / 5), n_targets=1,
# limitations under the License. # import pytest import treelite import numpy as np import cupy as cp import cudf from cuml.experimental.explainer.tree_shap import TreeExplainer from cuml.common.import_utils import has_xgboost, has_shap from cuml.common.exceptions import NotFittedError from cuml.ensemble import RandomForestRegressor as curfr from cuml.ensemble import RandomForestClassifier as curfc from sklearn.datasets import make_regression, make_classification if has_xgboost(): import xgboost as xgb if has_shap(): import shap @pytest.mark.parametrize('objective', [ 'reg:linear', 'reg:squarederror', 'reg:squaredlogerror', 'reg:pseudohubererror' ]) @pytest.mark.skipif(not has_xgboost(), reason="need to install xgboost") @pytest.mark.skipif(not has_shap(), reason="need to install shap") def test_xgb_regressor(objective): n_samples = 100 X, y = make_regression(n_samples=n_samples, n_features=8,