예제 #1
0
 def __init__(self,
              transformers=None,
              remainder='drop',
              sparse_threshold=0.3,
              n_jobs=None,
              transformer_weights=None,
              verbose=False):
     if not has_sklearn():
         raise ImportError("Scikit-learn is needed to use the "
                           "Column Transformer")
     if not transformers:
         warnings.warn('Transformers are required')
     self.transformers = transformers
     self.remainder = remainder
     self.sparse_threshold = sparse_threshold
     self.n_jobs = n_jobs
     self.transformer_weights = transformer_weights
     self.verbose = verbose
예제 #2
0
def _generate_hypercube(samples, dimensions, rng):
    """Returns distinct binary samples of length dimensions
    """
    if not has_sklearn():
        raise RuntimeError("Scikit-learn is needed to run \
                           make_classification.")

    from sklearn.utils.random import sample_without_replacement
    if dimensions > 30:
        return np.hstack([np.random.randint(2, size=(samples,
                                                     dimensions - 30)),
                          _generate_hypercube(samples, 30, rng)])
    random_state = int(rng.randint(dimensions))
    out = sample_without_replacement(2 ** dimensions, samples,
                                     random_state=random_state).astype(
                                         dtype='>u4', copy=False)
    out = np.unpackbits(out.view('>u1')).reshape((-1, 32))[:, -dimensions:]
    return out
예제 #3
0
파일: multiclass.py 프로젝트: rapidsai/cuml
 def fit(self, X, y) -> 'MulticlassClassifier':
     """
     Fit a multiclass classifier.
     """
     if not has_sklearn():
         raise ImportError("Scikit-learn is needed to use "
                           "MulticlassClassifier derived classes.")
     import sklearn.multiclass
     if self.strategy == 'ovr':
         self.multiclass_estimator = sklearn.multiclass.\
             OneVsRestClassifier(self.estimator, n_jobs=None)
     elif self.strategy == 'ovo':
         self.multiclass_estimator = \
             sklearn.multiclass.OneVsOneClassifier(
                 self.estimator, n_jobs=None)
     else:
         raise ValueError('Invalid multiclass strategy ' +
                          str(self.strategy) + ', must be one of '
                          '{"ovr", "ovo"}')
     X, _, _, _, _ = input_to_host_array(X)
     y, _, _, _, _ = input_to_host_array(y)
     with cuml.internals.exit_internal_api():
         self.multiclass_estimator.fit(X, y)
         return self
예제 #4
0
# License: BSD

# This code originates from the Scikit-Learn library,
# it was since modified to allow GPU acceleration.
# This code is under BSD 3 clause license.
# Authors mentioned above do not endorse or promote this production.

from itertools import chain
from itertools import compress
from joblib import Parallel
import functools
import timeit
import numbers
from cuml.common.import_utils import has_sklearn

if has_sklearn():
    from sklearn.base import clone
    from sklearn.utils import Bunch
from contextlib import contextmanager
from collections import defaultdict
import warnings

from scipy import sparse as sp_sparse
from cupyx.scipy import sparse as cu_sparse
import numpy as cpu_np
import cupy as np
import numba

import pandas as pd
import cudf
예제 #5
0
def learn_model(draw, X, y, task, learner, n_estimators, n_targets):
    # for lgbm or xgb return the booster or sklearn object?
    use_sklearn_estimator = draw(st.booleans())
    if learner == 'xgb':
        assume(has_xgboost())
        if task == 'regression':
            objective = draw(
                st.sampled_from(['reg:squarederror', 'reg:pseudohubererror']))
            model = xgb.XGBRegressor(n_estimators=n_estimators,
                                     tree_method='gpu_hist',
                                     objective=objective,
                                     enable_categorical=True,
                                     verbosity=0).fit(X, y)
        elif task == 'classification':
            valid_objectives = [
                'binary:logistic',
                'binary:hinge',
                'binary:logitraw',
                'count:poisson',
            ]
            if n_targets > 2:
                valid_objectives += [
                    'rank:pairwise', 'rank:ndcg', 'rank:map', 'multi:softmax',
                    'multi:softprob'
                ]

            objective = draw(st.sampled_from(valid_objectives))
            model = xgb.XGBClassifier(n_estimators=n_estimators,
                                      tree_method='gpu_hist',
                                      objective=objective,
                                      enable_categorical=True,
                                      verbosity=0).fit(X, y)
        pred = model.predict(X, output_margin=True)
        if not use_sklearn_estimator:
            model = model.get_booster()
        return model, pred
    elif learner == 'rf':
        predict_model = 'GPU ' if y.dtype == np.float32 else 'CPU'
        if task == 'regression':
            model = cuml.ensemble.RandomForestRegressor(
                n_estimators=n_estimators)
            model.fit(X, y)
            pred = model.predict(X, predict_model=predict_model)
        elif task == 'classification':
            model = cuml.ensemble.RandomForestClassifier(
                n_estimators=n_estimators)
            model.fit(X, y)
            pred = model.predict_proba(X)
        return model, pred
    elif learner == 'skl_rf':
        assume(has_sklearn())
        if task == 'regression':
            model = sklrfr(n_estimators=n_estimators)
            model.fit(X, y)
            pred = model.predict(X)
        elif task == 'classification':
            model = sklrfc(n_estimators=n_estimators)
            model.fit(X, y)
            pred = model.predict_proba(X)
        return model, pred
    elif learner == 'lgbm':
        assume(has_lightgbm())
        if task == 'regression':
            model = lgb.LGBMRegressor(n_estimators=n_estimators).fit(X, y)
        elif task == 'classification':
            model = lgb.LGBMClassifier(n_estimators=n_estimators).fit(X, y)
        pred = model.predict(X, raw_score=True)
        if not use_sklearn_estimator:
            model = model.booster_
        return model, pred
예제 #6
0
from cuml.testing.utils import as_type

# See issue #4729
# Xgboost disabled due to CI failures
xgb = None


def has_xgboost():
    return False


if has_lightgbm():
    import lightgbm as lgb
if has_shap():
    import shap
if has_sklearn():
    from sklearn.datasets import make_regression, make_classification
    from sklearn.ensemble import RandomForestRegressor as sklrfr
    from sklearn.ensemble import RandomForestClassifier as sklrfc


def make_classification_with_categorical(*,
                                         n_samples,
                                         n_features,
                                         n_categorical,
                                         n_informative,
                                         n_redundant,
                                         n_repeated,
                                         n_classes,
                                         random_state,
                                         numeric_dtype=np.float32):