def test_drf_regressor_backupsklearn(backend='auto'): df = pd.read_csv("./open_data/simple.txt", delim_whitespace=True) X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C') y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C') import h2o4gpu Solver = h2o4gpu.RandomForestRegressor #Run h2o4gpu version of RandomForest Regression drf = Solver(backend=backend, random_state=1234, oob_score=True, n_estimators=10) print("h2o4gpu fit()") drf.fit(X, y) #Run Sklearn version of RandomForest Regression from h2o4gpu.ensemble import RandomForestRegressorSklearn drf_sk = RandomForestRegressorSklearn(random_state=1234, oob_score=True, max_depth=3, n_estimators=10) print("Scikit fit()") drf_sk.fit(X, y) if backend == "sklearn": assert (drf.predict(X) == drf_sk.predict(X)).all() == True assert (drf.score(X, y) == drf_sk.score(X, y)).all() == True assert (drf.decision_path(X)[1] == drf_sk.decision_path(X)[1] ).all() == True assert (drf.apply(X) == drf_sk.apply(X)).all() == True print("Estimators") print(drf.estimators_) print(drf_sk.estimators_) print("n_features") print(drf.n_features_) print(drf_sk.n_features_) assert drf.n_features_ == drf_sk.n_features_ print("n_outputs") print(drf.n_outputs_) print(drf_sk.n_outputs_) assert drf.n_outputs_ == drf_sk.n_outputs_ print("Feature importance") print(drf.feature_importances_) print(drf_sk.feature_importances_) assert (drf.feature_importances_ == drf_sk.feature_importances_ ).all() == True print("oob_score") print(drf.oob_score_) print(drf_sk.oob_score_) assert drf.oob_score_ == drf_sk.oob_score_ print("oob_prediction") print(drf.oob_prediction_) print(drf_sk.oob_prediction_) assert (drf.oob_prediction_ == drf_sk.oob_prediction_).all() == True
def __init__( self, n_estimators=10, # h2o4gpu criterion='mse', max_depth=3, # h2o4gpu min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, bootstrap=True, oob_score=False, n_jobs=1, # h2o4gpu random_state=None, # h2o4gpu verbose=0, # h2o4gpu warm_start=False, # XGBoost specific params subsample=1.0, # h2o4gpu colsample_bytree=1.0, # h2o4gpu num_parallel_tree=100, # h2o4gpu tree_method='gpu_hist', # h2o4gpu n_gpus=-1, # h2o4gpu predictor='gpu_predictor', # h2o4gpu backend='auto'): # h2o4gpu import os _backend = os.environ.get('H2O4GPU_BACKEND', None) if _backend is not None: backend = _backend from ..typecheck.typechecks import assert_is_type assert_is_type(backend, str) # Fall back to Sklearn # Can remove if fully implement sklearn functionality self.do_sklearn = False if backend == 'auto': params_string = [ 'min_samples_split', 'min_samples_leaf', 'min_weight_fraction_leaf', 'max_features', 'max_leaf_nodes', 'min_impurity_decrease', 'min_impurity_split', 'bootstrap', 'oob_score' ] params = [ min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_features, max_leaf_nodes, min_impurity_decrease, min_impurity_split, bootstrap, oob_score ] params_default = [2, 1, 0.0, 'auto', None, 0.0, None, True, False] i = 0 for param in params: if param != params_default[i]: self.do_sklearn = True if verbose > 0: print("WARNING: The sklearn parameter " + params_string[i] + " has been changed from default to " + str(param) + ". Will run Sklearn RandomForestRegressor.") self.do_sklearn = True i = i + 1 elif backend == 'sklearn': self.do_sklearn = True elif backend == 'h2o4gpu': self.do_sklearn = False self.backend = backend from h2o4gpu.ensemble import RandomForestRegressorSklearn self.model_sklearn = RandomForestRegressorSklearn( n_estimators=n_estimators, criterion=criterion, max_depth=max_depth, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_features=max_features, max_leaf_nodes=max_leaf_nodes, min_impurity_decrease=min_impurity_decrease, min_impurity_split=min_impurity_split, bootstrap=bootstrap, oob_score=oob_score, n_jobs=n_jobs, random_state=random_state, verbose=verbose, warm_start=warm_start) # Parameters for random forest silent = False if verbose != 0: silent = True if random_state is None: random_state = 0 import xgboost as xgb self.model_h2o4gpu = xgb.XGBRegressor( n_estimators=n_estimators, # h2o4gpu max_depth=max_depth, # h2o4gpu n_jobs=n_jobs, # h2o4gpu random_state=random_state, # h2o4gpu num_parallel_tree=num_parallel_tree, tree_method=tree_method, n_gpus=n_gpus, predictor=predictor, silent=silent, num_round=1, subsample=subsample, colsample_bytree=colsample_bytree) if self.do_sklearn: print("Running sklearn RandomForestRegressor") self.model = self.model_sklearn else: print("Running h2o4gpu RandomForestRegressor") self.model = self.model_h2o4gpu
class RandomForestRegressor(object): """H2O RandomForestRegressor Solver Selects between h2o4gpu.solvers.xgboost.RandomForestRegressor and h2o4gpu.ensemble.forest.RandomForestRegressorSklearn Documentation: import h2o4gpu.solvers ; help(h2o4gpu.xgboost.RandomForestRegressorO) help(h2o4gpu.ensemble.forest.RandomForestRegressorSklearn) :param: backend : Which backend to use. Options are 'auto', 'sklearn', 'h2o4gpu'. Default is 'auto'. Saves as attribute for actual backend used. """ def __init__( self, n_estimators=10, # h2o4gpu criterion='mse', max_depth=3, # h2o4gpu min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, bootstrap=True, oob_score=False, n_jobs=1, # h2o4gpu random_state=None, # h2o4gpu verbose=0, # h2o4gpu warm_start=False, # XGBoost specific params subsample=1.0, # h2o4gpu colsample_bytree=1.0, # h2o4gpu num_parallel_tree=100, # h2o4gpu tree_method='gpu_hist', # h2o4gpu n_gpus=-1, # h2o4gpu predictor='gpu_predictor', # h2o4gpu backend='auto'): # h2o4gpu import os _backend = os.environ.get('H2O4GPU_BACKEND', None) if _backend is not None: backend = _backend from ..typecheck.typechecks import assert_is_type assert_is_type(backend, str) # Fall back to Sklearn # Can remove if fully implement sklearn functionality self.do_sklearn = False if backend == 'auto': params_string = [ 'min_samples_split', 'min_samples_leaf', 'min_weight_fraction_leaf', 'max_features', 'max_leaf_nodes', 'min_impurity_decrease', 'min_impurity_split', 'bootstrap', 'oob_score' ] params = [ min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_features, max_leaf_nodes, min_impurity_decrease, min_impurity_split, bootstrap, oob_score ] params_default = [2, 1, 0.0, 'auto', None, 0.0, None, True, False] i = 0 for param in params: if param != params_default[i]: self.do_sklearn = True if verbose > 0: print("WARNING: The sklearn parameter " + params_string[i] + " has been changed from default to " + str(param) + ". Will run Sklearn RandomForestRegressor.") self.do_sklearn = True i = i + 1 elif backend == 'sklearn': self.do_sklearn = True elif backend == 'h2o4gpu': self.do_sklearn = False self.backend = backend from h2o4gpu.ensemble import RandomForestRegressorSklearn self.model_sklearn = RandomForestRegressorSklearn( n_estimators=n_estimators, criterion=criterion, max_depth=max_depth, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_features=max_features, max_leaf_nodes=max_leaf_nodes, min_impurity_decrease=min_impurity_decrease, min_impurity_split=min_impurity_split, bootstrap=bootstrap, oob_score=oob_score, n_jobs=n_jobs, random_state=random_state, verbose=verbose, warm_start=warm_start) # Parameters for random forest silent = False if verbose != 0: silent = True if random_state is None: random_state = 0 import xgboost as xgb self.model_h2o4gpu = xgb.XGBRegressor( n_estimators=n_estimators, # h2o4gpu max_depth=max_depth, # h2o4gpu n_jobs=n_jobs, # h2o4gpu random_state=random_state, # h2o4gpu num_parallel_tree=num_parallel_tree, tree_method=tree_method, n_gpus=n_gpus, predictor=predictor, silent=silent, num_round=1, subsample=subsample, colsample_bytree=colsample_bytree) if self.do_sklearn: print("Running sklearn RandomForestRegressor") self.model = self.model_sklearn else: print("Running h2o4gpu RandomForestRegressor") self.model = self.model_h2o4gpu def apply(self, X): print("WARNING: apply() is using sklearn") return self.model_sklearn.apply(X) def decision_path(self, X): print("WARNING: decision_path() is using sklearn") return self.model_sklearn.decision_path(X) def fit(self, X, y=None, sample_weight=None): res = self.model.fit(X, y, sample_weight) self.set_attributes() return res def get_params(self): return self.model.get_params() def predict(self, X): if self.do_sklearn: res = self.model.predict(X) self.set_attributes() return res res = self.model.predict(X) res[res < 0.5] = 0 res[res > 0.5] = 1 self.set_attributes() return res.squeeze() def score(self, X, y, sample_weight=None): # TODO add for h2o4gpu print("WARNING: score() is using sklearn") if not self.do_sklearn: self.model_sklearn.fit(X, y) # Need to re-fit res = self.model_sklearn.score(X, y, sample_weight) return res def set_params(self, **params): return self.model.set_params(**params) def set_attributes(self): """ Set attributes for class""" from ..solvers.utils import _setter s = _setter(oself=self, e1=NameError, e2=AttributeError) s('oself.estimators_ = oself.model.estimators_') s('oself.n_features_ = oself.model.n_features_') s('oself.n_outputs_ = oself.model.n_outputs_') s('oself.feature_importances_ = oself.model.feature_importances_') s('oself.oob_score_ = oself.model.oob_score_') s('oself.oob_prediction_ = oself.model.oob_prediction_')