def test_gbm_regressor_backupsklearn(backend='auto'): df = pd.read_csv("./open_data/simple.txt", delim_whitespace=True) X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C') y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C') import h2o4gpu Solver = h2o4gpu.GradientBoostingRegressor # Run h2o4gpu version of RandomForest Regression gbm = Solver(backend=backend, random_state=1234, n_gpus=n_gpus(), n_jobs=-1) print("h2o4gpu fit()") gbm.fit(X, y) # Run Sklearn version of RandomForest Regression from h2o4gpu.ensemble import GradientBoostingRegressorSklearn gbm_sk = GradientBoostingRegressorSklearn(random_state=1234, max_depth=3) print("Scikit fit()") gbm_sk.fit(X, y) if backend == "sklearn": assert (gbm.predict(X) == gbm_sk.predict(X)).all() == True print( (a == b for a, b in zip(gbm.staged_predict(X), gbm_sk.staged_predict(X)))) assert np.allclose(list(gbm.staged_predict(X)), list(gbm_sk.staged_predict(X))) assert (gbm.score(X, y) == gbm_sk.score(X, y)).all() == True assert (gbm.apply(X) == gbm_sk.apply(X)).all() == True print("Estimators") print(gbm.estimators_) print(gbm_sk.estimators_) print("loss") print(gbm.loss_) print(gbm_sk.loss_) assert gbm.loss_.__dict__ == gbm_sk.loss_.__dict__ print("init_") print(gbm.init) print(gbm_sk.init) print("Feature importance") print(gbm.feature_importances_) print(gbm_sk.feature_importances_) assert (gbm.feature_importances_ == gbm_sk.feature_importances_ ).all() == True print("train_score_") print(gbm.train_score_) print(gbm_sk.train_score_) assert (gbm.train_score_ == gbm_sk.train_score_).all() == True
class GradientBoostingRegressor(object): """H2O GradientBoostingRegressor Solver Selects between h2o4gpu.solvers.xgboost.GradientBoostingRegressor and h2o4gpu.ensemble.gradient_boosting.GradientBoostingRegressorSklearn Documentation: import h2o4gpu.solvers ; help(h2o4gpu.xgboost.GradientBoostingRegressorO) help(h2o4gpu.ensemble.gradient_boosting.GradientBoostingRegressorSklearn) :param: backend : Which backend to use. Options are 'auto', 'sklearn', 'h2o4gpu'. Default is 'auto'. Saves as attribute for actual backend used. """ def __init__( self, loss='ls', learning_rate=0.1, # h2o4gpu n_estimators=100, # h2o4gpu subsample=1.0, # h2o4gpu criterion='friedman_mse', min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3, # h2o4gpu min_impurity_decrease=0.0, min_impurity_split=None, init=None, random_state=None, # h2o4gpu max_features='auto', alpha=0.9, verbose=0, # h2o4gpu max_leaf_nodes=None, warm_start=False, presort='auto', # XGBoost specific params colsample_bytree=1.0, # h2o4gpu num_parallel_tree=100, # h2o4gpu tree_method='gpu_hist', # h2o4gpu n_gpus=-1, # h2o4gpu predictor='gpu_predictor', # h2o4gpu backend='auto'): # h2o4gpu import os _backend = os.environ.get('H2O4GPU_BACKEND', None) if _backend is not None: backend = _backend from ..typecheck.typechecks import assert_is_type assert_is_type(backend, str) # Fall back to Sklearn # Can remove if fully implement sklearn functionality self.do_sklearn = False if backend == 'auto': params_string = [ 'loss', 'criterion', 'min_samples_split', 'min_samples_leaf', 'min_weight_fraction_leaf', 'min_impurity_decrease', 'min_impurity_split', 'init', 'max_features', 'alpha', 'max_leaf_nodes', 'presort' ] params = [ loss, criterion, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, min_impurity_decrease, min_impurity_split, init, max_features, alpha, max_leaf_nodes, presort ] params_default = [ 'ls', 'friedman-mse', 2, 1, 0.0, 0.0, None, None, 'auto', 0.9, None, 'auto' ] i = 0 for param in params: if param != params_default[i]: self.do_sklearn = True if verbose > 0: print("WARNING: The sklearn parameter " + params_string[i] + " has been changed from default to " + str(param) + ". Will run Sklearn GradientBoostingRegressor.") self.do_sklearn = True i = i + 1 elif backend == 'sklearn': self.do_sklearn = True elif backend == 'h2o4gpu': self.do_sklearn = False self.backend = backend from h2o4gpu.ensemble import GradientBoostingRegressorSklearn self.model_sklearn = GradientBoostingRegressorSklearn( loss=loss, learning_rate=learning_rate, # h2o4gpu n_estimators=n_estimators, # h2o4gpu subsample=subsample, # h2o4gpu criterion=criterion, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_depth=max_depth, # h2o4gpu min_impurity_decrease=min_impurity_decrease, min_impurity_split=min_impurity_split, init=init, random_state=random_state, # h2o4gpu max_features=max_features, alpha=alpha, verbose=verbose, # h2o4gpu max_leaf_nodes=max_leaf_nodes, warm_start=warm_start, presort=presort) # h2o4gpu) # Parameters for gbm silent = False if verbose != 0: silent = True if random_state is None: random_state = 0 import xgboost as xgb self.model_h2o4gpu = xgb.XGBClassifier( learning_rate=learning_rate, # h2o4gpu n_estimators=n_estimators, # h2o4gpu subsample=subsample, # h2o4gpu max_depth=max_depth, # h2o4gpu random_state=random_state, # h2o4gpu silent=silent, # h2o4gpu colsample_bytree=colsample_bytree, # h2o4gpu num_parallel_tree=num_parallel_tree, # h2o4gpu tree_method=tree_method, # h2o4gpu n_gpus=n_gpus, # h2o4gpu predictor=predictor, # h2o4gpu backend=backend) # h2o4gpu if self.do_sklearn: print("Running sklearn GradientBoostingRegressor") self.model = self.model_sklearn else: print("Running h2o4gpu GradientBoostingRegressor") self.model = self.model_h2o4gpu def apply(self, X): print("WARNING: apply() is using sklearn") return self.model_sklearn.apply(X) def fit(self, X, y=None, sample_weight=None): res = self.model.fit(X, y, sample_weight) self.set_attributes() return res def get_params(self): return self.model.get_params() def predict(self, X): if self.do_sklearn: res = self.model.predict(X) self.set_attributes() return res res = self.model.predict(X) res[res < 0.5] = 0 res[res > 0.5] = 1 self.set_attributes() return res.squeeze() def score(self, X, y, sample_weight=None): # TODO add for h2o4gpu print("WARNING: score() is using sklearn") if not self.do_sklearn: self.model_sklearn.fit(X, y) # Need to re-fit res = self.model_sklearn.score(X, y, sample_weight) return res def set_params(self, **params): return self.model.set_params(**params) def staged_predict(self, X): print("WARNING: staged_predict() is using sklearn") return self.model_sklearn.staged_predict(X) def set_attributes(self): """ Set attributes for class""" from ..solvers.utils import _setter s = _setter(oself=self, e1=NameError, e2=AttributeError) s('oself.feature_importances_ = oself.model.feature_importances_') s('oself.oob_improvement_ = oself.model.oob_improvement_') s('oself.train_score_ = oself.model.train_score_') s('oself.loss_ = oself.model.loss_') s('oself.init = oself.model.init') s('oself.estimators_ = oself.model.estimators_')