def __init__(self, transformers=None, remainder='drop', sparse_threshold=0.3, n_jobs=None, transformer_weights=None, verbose=False): if not has_sklearn(): raise ImportError("Scikit-learn is needed to use the " "Column Transformer") if not transformers: warnings.warn('Transformers are required') self.transformers = transformers self.remainder = remainder self.sparse_threshold = sparse_threshold self.n_jobs = n_jobs self.transformer_weights = transformer_weights self.verbose = verbose
def _generate_hypercube(samples, dimensions, rng): """Returns distinct binary samples of length dimensions """ if not has_sklearn(): raise RuntimeError("Scikit-learn is needed to run \ make_classification.") from sklearn.utils.random import sample_without_replacement if dimensions > 30: return np.hstack([np.random.randint(2, size=(samples, dimensions - 30)), _generate_hypercube(samples, 30, rng)]) random_state = int(rng.randint(dimensions)) out = sample_without_replacement(2 ** dimensions, samples, random_state=random_state).astype( dtype='>u4', copy=False) out = np.unpackbits(out.view('>u1')).reshape((-1, 32))[:, -dimensions:] return out
def fit(self, X, y) -> 'MulticlassClassifier': """ Fit a multiclass classifier. """ if not has_sklearn(): raise ImportError("Scikit-learn is needed to use " "MulticlassClassifier derived classes.") import sklearn.multiclass if self.strategy == 'ovr': self.multiclass_estimator = sklearn.multiclass.\ OneVsRestClassifier(self.estimator, n_jobs=None) elif self.strategy == 'ovo': self.multiclass_estimator = \ sklearn.multiclass.OneVsOneClassifier( self.estimator, n_jobs=None) else: raise ValueError('Invalid multiclass strategy ' + str(self.strategy) + ', must be one of ' '{"ovr", "ovo"}') X, _, _, _, _ = input_to_host_array(X) y, _, _, _, _ = input_to_host_array(y) with cuml.internals.exit_internal_api(): self.multiclass_estimator.fit(X, y) return self
# License: BSD # This code originates from the Scikit-Learn library, # it was since modified to allow GPU acceleration. # This code is under BSD 3 clause license. # Authors mentioned above do not endorse or promote this production. from itertools import chain from itertools import compress from joblib import Parallel import functools import timeit import numbers from cuml.common.import_utils import has_sklearn if has_sklearn(): from sklearn.base import clone from sklearn.utils import Bunch from contextlib import contextmanager from collections import defaultdict import warnings from scipy import sparse as sp_sparse from cupyx.scipy import sparse as cu_sparse import numpy as cpu_np import cupy as np import numba import pandas as pd import cudf
def learn_model(draw, X, y, task, learner, n_estimators, n_targets): # for lgbm or xgb return the booster or sklearn object? use_sklearn_estimator = draw(st.booleans()) if learner == 'xgb': assume(has_xgboost()) if task == 'regression': objective = draw( st.sampled_from(['reg:squarederror', 'reg:pseudohubererror'])) model = xgb.XGBRegressor(n_estimators=n_estimators, tree_method='gpu_hist', objective=objective, enable_categorical=True, verbosity=0).fit(X, y) elif task == 'classification': valid_objectives = [ 'binary:logistic', 'binary:hinge', 'binary:logitraw', 'count:poisson', ] if n_targets > 2: valid_objectives += [ 'rank:pairwise', 'rank:ndcg', 'rank:map', 'multi:softmax', 'multi:softprob' ] objective = draw(st.sampled_from(valid_objectives)) model = xgb.XGBClassifier(n_estimators=n_estimators, tree_method='gpu_hist', objective=objective, enable_categorical=True, verbosity=0).fit(X, y) pred = model.predict(X, output_margin=True) if not use_sklearn_estimator: model = model.get_booster() return model, pred elif learner == 'rf': predict_model = 'GPU ' if y.dtype == np.float32 else 'CPU' if task == 'regression': model = cuml.ensemble.RandomForestRegressor( n_estimators=n_estimators) model.fit(X, y) pred = model.predict(X, predict_model=predict_model) elif task == 'classification': model = cuml.ensemble.RandomForestClassifier( n_estimators=n_estimators) model.fit(X, y) pred = model.predict_proba(X) return model, pred elif learner == 'skl_rf': assume(has_sklearn()) if task == 'regression': model = sklrfr(n_estimators=n_estimators) model.fit(X, y) pred = model.predict(X) elif task == 'classification': model = sklrfc(n_estimators=n_estimators) model.fit(X, y) pred = model.predict_proba(X) return model, pred elif learner == 'lgbm': assume(has_lightgbm()) if task == 'regression': model = lgb.LGBMRegressor(n_estimators=n_estimators).fit(X, y) elif task == 'classification': model = lgb.LGBMClassifier(n_estimators=n_estimators).fit(X, y) pred = model.predict(X, raw_score=True) if not use_sklearn_estimator: model = model.booster_ return model, pred
from cuml.testing.utils import as_type # See issue #4729 # Xgboost disabled due to CI failures xgb = None def has_xgboost(): return False if has_lightgbm(): import lightgbm as lgb if has_shap(): import shap if has_sklearn(): from sklearn.datasets import make_regression, make_classification from sklearn.ensemble import RandomForestRegressor as sklrfr from sklearn.ensemble import RandomForestClassifier as sklrfc def make_classification_with_categorical(*, n_samples, n_features, n_categorical, n_informative, n_redundant, n_repeated, n_classes, random_state, numeric_dtype=np.float32):