def __init__( self, penalty='l2', # h2o4gpu dual=False, tol=1E-2, # h2o4gpu C=1.0, # h2o4gpu fit_intercept=True, # h2o4gpu intercept_scaling=1.0, class_weight=None, random_state=None, solver='liblinear', max_iter=5000, # h2o4gpu multi_class='ovr', verbose=0, # h2o4gpu warm_start=False, n_jobs=1, n_gpus=-1, # h2o4gpu glm_stop_early=True, # h2o4gpu glm_stop_early_error_fraction=1.0, backend='auto'): # h2o4gpu import os _backend = os.environ.get('H2O4GPU_BACKEND', None) if _backend is not None: backend = _backend # Fall back to Sklearn # Can remove if fully implement sklearn functionality self.do_sklearn = False if backend == 'auto': params_string = [ 'intercept_scaling', 'class_weight', 'solver', 'multi_class' ] params = [intercept_scaling, class_weight, solver, multi_class] params_default = [1.0, None, 'liblinear', 'ovr'] i = 0 for param in params: if param != params_default[i]: self.do_sklearn = True if verbose: print("WARNING:" " The sklearn parameter " + params_string[i] + " has been changed from default to " + str(param) + " Will run Sklearn Logistic Regression.") self.do_sklearn = True i = i + 1 elif backend == 'sklearn': self.do_sklearn = True elif backend == 'h2o4gpu': self.do_sklearn = False if self.do_sklearn: self.backend = 'sklearn' else: self.backend = 'h2o4gpu' self.model_sklearn = sk.LogisticRegressionSklearn( penalty=penalty, dual=dual, tol=tol, C=C, fit_intercept=fit_intercept, intercept_scaling=intercept_scaling, class_weight=class_weight, random_state=random_state, solver=solver, max_iter=max_iter, multi_class=multi_class, verbose=verbose, warm_start=warm_start, n_jobs=n_jobs) # Equivalent Logistic parameters for h2o4gpu n_threads = None n_alphas = 1 n_lambdas = 1 n_folds = 1 lambda_max = 1.0 / C lambda_min_ratio = 1.0 lambda_stop_early = False store_full_path = 0 alphas = None lambdas = None # Utilize penalty parameter to setup alphas if penalty == 'l2': alpha_min = 0.0 alpha_max = 0.0 elif penalty == 'l1': alpha_min = 1.0 alpha_max = 1.0 else: assert ValueError, "penalty should be either l1 " \ "or l2 but got " + penalty self.model_h2o4gpu = elastic_net.ElasticNetH2O( n_threads=n_threads, n_gpus=n_gpus, fit_intercept=fit_intercept, lambda_min_ratio=lambda_min_ratio, n_lambdas=n_lambdas, n_folds=n_folds, n_alphas=n_alphas, tol=tol, lambda_stop_early=lambda_stop_early, glm_stop_early=glm_stop_early, glm_stop_early_error_fraction=glm_stop_early_error_fraction, max_iter=max_iter, verbose=verbose, family='logistic', store_full_path=store_full_path, lambda_max=lambda_max, alpha_max=alpha_max, alpha_min=alpha_min, alphas=alphas, lambdas=lambdas, order=None) if self.do_sklearn: if verbose: print("Running sklearn Logistic Regression") self.model = self.model_sklearn else: if verbose: print("Running h2o4gpu Logistic Regression") self.model = self.model_h2o4gpu self.verbose = verbose
def __init__( self, alpha=1.0, #h2o4gpu fit_intercept=True, #h2o4gpu normalize=False, precompute=False, copy_X=True, max_iter=5000, #h2o4gpu tol=1e-2, #h2o4gpu warm_start=False, positive=False, random_state=None, selection='cyclic', n_gpus=-1, # h2o4gpu glm_stop_early=True, # h2o4gpu glm_stop_early_error_fraction=1.0, #h2o4gpu verbose=False, backend='auto'): # h2o4gpu import os _backend = os.environ.get('H2O4GPU_BACKEND', None) if _backend is not None: backend = _backend assert_is_type(backend, str) # Fall back to Sklearn # Can remove if fully implement sklearn functionality self.do_sklearn = False if backend == 'auto': params_string = ['normalize', 'positive', 'selection'] params = [normalize, positive, selection] params_default = [False, False, 'cyclic'] i = 0 for param in params: if param != params_default[i]: self.do_sklearn = True if verbose: print("WARNING:" " The sklearn parameter " + params_string[i] + " has been changed from default to " + str(param) + ". Will run Sklearn Lasso Regression.") self.do_sklearn = True i = i + 1 elif backend == 'sklearn': self.do_sklearn = True elif backend == 'h2o4gpu': self.do_sklearn = False if self.do_sklearn: self.backend = 'sklearn' else: self.backend = 'h2o4gpu' self.model_sklearn = sk.LassoSklearn( alpha=alpha, fit_intercept=fit_intercept, normalize=normalize, precompute=precompute, copy_X=copy_X, max_iter=max_iter, tol=tol, warm_start=warm_start, positive=positive, random_state=random_state, selection=selection) #Equivalent Lasso parameters for h2o4gpu n_threads = None n_alphas = 1 n_lambdas = 1 n_folds = 1 lambda_max = alpha lambda_min_ratio = 1.0 lambda_stop_early = False store_full_path = 1 alphas = None lambdas = None alpha_min = 1.0 alpha_max = 1.0 self.model_h2o4gpu = elastic_net.ElasticNetH2O( n_threads=n_threads, n_gpus=n_gpus, fit_intercept=fit_intercept, lambda_min_ratio=lambda_min_ratio, n_lambdas=n_lambdas, n_folds=n_folds, n_alphas=n_alphas, tol=tol, lambda_stop_early=lambda_stop_early, glm_stop_early=glm_stop_early, glm_stop_early_error_fraction=glm_stop_early_error_fraction, max_iter=max_iter, verbose=verbose, store_full_path=store_full_path, lambda_max=lambda_max, alpha_max=alpha_max, alpha_min=alpha_min, alphas=alphas, lambdas=lambdas, order=None) if self.do_sklearn: if verbose: print("Running sklearn Lasso Regression") self.model = self.model_sklearn else: if verbose: print("Running h2o4gpu Lasso Regression") self.model = self.model_h2o4gpu self.verbose = verbose
def __init__( self, alpha=1.0, #h2o4gpu fit_intercept=True, #h2o4gpu normalize=False, copy_X=True, max_iter=5000, #h2o4gpu tol=1e-2, #h2o4gpu solver='auto', random_state=None, n_gpus=-1, # h2o4gpu glm_stop_early=True, # h2o4gpu glm_stop_early_error_fraction=1.0, #h2o4gpu verbose=False, backend='auto', **kwargs): # h2o4gpu import os _backend = os.environ.get('H2O4GPU_BACKEND', None) if _backend is not None: backend = _backend self.do_daal = False self.do_sklearn = False # Fall back to Sklearn # Can remove if fully implement sklearn functionality self.do_sklearn = False if backend == 'auto': params_string = ['normalize', 'solver'] params = [normalize, solver] params_default = [False, 'auto'] i = 0 for param in params: if param != params_default[i]: self.do_sklearn = True if verbose: print("WARNING:" " The sklearn parameter " + params_string[i] + " has been changed from default to " + str(param) + ". Will run Sklearn Ridge Regression.") self.do_sklearn = True i = i + 1 elif backend == 'sklearn': self.do_sklearn = True self.backend = 'sklearn' elif backend == 'h2o4gpu': self.do_sklearn = False self.backend = 'h2o4gpu' elif backend == 'daal': from h2o4gpu import DAAL_SUPPORTED if DAAL_SUPPORTED: from h2o4gpu.solvers.daal_solver.regression \ import RidgeRegression as DRR self.do_daal = True self.backend = 'daal' self.model_daal = DRR(alpha=alpha, fit_intercept=fit_intercept, normalize=normalize, **kwargs) else: import platform print("WARNING:" "DAAL is supported only for x86_64, " "architecture detected {}. Sklearn model" "used instead".format(platform.architecture())) self.do_sklearn = True self.backend = 'h2o4gpu' self.model_sklearn = sk.RidgeSklearn( alpha=alpha, fit_intercept=fit_intercept, normalize=normalize, copy_X=copy_X, max_iter=max_iter, tol=tol, solver=solver, random_state=random_state) # Equivalent Ridge parameters for h2o4gpu n_threads = None n_alphas = 1 n_lambdas = 1 n_folds = 1 lambda_max = alpha lambda_min_ratio = 1.0 lambda_stop_early = False store_full_path = 1 alphas = None lambdas = None alpha_min = 0.0 alpha_max = 0.0 self.model_h2o4gpu = elastic_net.ElasticNetH2O( n_threads=n_threads, n_gpus=n_gpus, fit_intercept=fit_intercept, lambda_min_ratio=lambda_min_ratio, n_lambdas=n_lambdas, n_folds=n_folds, n_alphas=n_alphas, tol=tol, lambda_stop_early=lambda_stop_early, glm_stop_early=glm_stop_early, glm_stop_early_error_fraction=glm_stop_early_error_fraction, max_iter=max_iter, verbose=verbose, store_full_path=store_full_path, lambda_max=lambda_max, alpha_max=alpha_max, alpha_min=alpha_min, alphas=alphas, lambdas=lambdas, order=None) if self.do_sklearn: if verbose: print("Running sklearn Ridge Regression") self.model = self.model_sklearn elif self.do_daal: if verbose: print("Running PyDAAL Ridge Regression") self.model = self.model_daal else: if verbose: print("Running h2o4gpu Ridge Regression") self.model = self.model_h2o4gpu self.verbose = verbose
def __init__( self, fit_intercept=True, #h2o4gpu normalize=False, copy_X=True, n_jobs=1, n_gpus=-1, tol=1E-4, glm_stop_early=True, # h2o4gpu glm_stop_early_error_fraction=1.0, # h2o4gpu verbose=False, backend='auto', **kwargs): import os _backend = os.environ.get('H2O4GPU_BACKEND', None) if _backend is not None: backend = _backend self.do_daal = False self.do_sklearn = False if backend == 'auto': # Fall back to Sklearn # Can remove if fully implement sklearn functionality self.do_sklearn = False params_string = ['normalize'] params = [normalize] params_default = [False] i = 0 for param in params: if param != params_default[i]: self.do_sklearn = True if verbose: print("WARNING:" " The sklearn parameter " + params_string[i] + " has been changed from default to " + str(param) + ". Will run Sklearn Linear Regression.") self.do_sklearn = True i = i + 1 elif backend == 'sklearn': self.do_sklearn = True self.backend = 'sklearn' elif backend == 'h2o4gpu': self.do_sklearn = False self.backend = 'h2o4gpu' elif backend == 'daal': from h2o4gpu import DAAL_SUPPORTED if DAAL_SUPPORTED: from h2o4gpu.solvers.daal_solver.regression \ import LinearRegression as DLR self.do_daal = True self.backend = 'daal' self.model_daal = DLR(fit_intercept=fit_intercept, normalize=normalize, **kwargs) else: import platform print("WARNING:" "DAAL is supported only for x86_64, " "architecture detected {}. Sklearn model" "used instead".format(platform.architecture())) self.do_sklearn = True self.backend = 'sklearn' self.model_sklearn = sk.LinearRegressionSklearn( fit_intercept=fit_intercept, normalize=normalize, copy_X=copy_X, n_jobs=n_jobs) # Equivalent Linear Regression parameters for h2o4gpu n_threads = None n_gpus = n_gpus fit_intercept = fit_intercept lambda_min_ratio = 0.0 n_lambdas = 1 n_folds = 1 n_alphas = 1 tol = tol tol_seek_factor = 1E-1 lambda_stop_early = False glm_stop_early = glm_stop_early glm_stop_early_error_fraction = glm_stop_early_error_fraction max_iter = 5000 verbose = verbose family = 'elasticnet' lambda_max = 0.0 alpha_max = 0.0 alpha_min = 0.0 alphas = None lambdas = None self.model_h2o4gpu = elastic_net.ElasticNetH2O( n_threads=n_threads, n_gpus=n_gpus, fit_intercept=fit_intercept, lambda_min_ratio=lambda_min_ratio, n_lambdas=n_lambdas, n_folds=n_folds, n_alphas=n_alphas, tol=tol, lambda_stop_early=lambda_stop_early, glm_stop_early=glm_stop_early, glm_stop_early_error_fraction=glm_stop_early_error_fraction, max_iter=max_iter, verbose=verbose, lambda_max=lambda_max, alpha_max=alpha_max, alpha_min=alpha_min, alphas=alphas, lambdas=lambdas, tol_seek_factor=tol_seek_factor, family=family, order=None) if self.do_sklearn: if verbose: print("Running sklearn Linear Regression") self.model = self.model_sklearn elif self.do_daal: if verbose: print("Running PyDAAL Linear Regression") self.model = self.model_daal else: if verbose: print("Running h2o4gpu Linear Regression") self.model = self.model_h2o4gpu self.verbose = verbose
def fit_model(X_train, y_train, X_test, y_test, reg_type='enet'): if reg_type == 'lasso': tol = 1e-2 alpha = 1.0 n_threads = None n_alphas = 1 n_lambdas = 1 n_folds = 1 lambda_max = alpha lambda_min_ratio = 1.0 lambda_stop_early = False store_full_path = 1 alphas = None lambdas = None alpha_min = 1.0 alpha_max = 1.0 n_gpus = -1 fit_intercept = True max_iter = 5000 glm_stop_early = True glm_stop_early_error_fraction = 1.0 verbose = False reg_h2o = elastic_net.ElasticNetH2O( n_threads=n_threads, n_gpus=n_gpus, fit_intercept=fit_intercept, lambda_min_ratio=lambda_min_ratio, n_lambdas=n_lambdas, n_folds=n_folds, n_alphas=n_alphas, tol=tol, lambda_stop_early=lambda_stop_early, glm_stop_early=glm_stop_early, glm_stop_early_error_fraction=glm_stop_early_error_fraction, max_iter=max_iter, verbose=verbose, store_full_path=store_full_path, lambda_max=lambda_max, alpha_max=alpha_max, alpha_min=alpha_min, alphas=alphas, lambdas=lambdas, order=None) reg_sklearn = linear_model.LassoSklearn() elif reg_type == 'ridge': reg_h2o = h2o4gpu.Ridge() reg_sklearn = linear_model.RidgeSklearn() elif reg_type == 'enet': reg_h2o = h2o4gpu.ElasticNet() # update when the wrapper is done reg_sklearn = linear_model.ElasticNetSklearn() start_h2o = time.time() reg_h2o.fit(X_train, y_train, free_input_data=1) time_h2o = time.time() - start_h2o start_sklearn = time.time() reg_sklearn.fit(X_train, y_train) time_sklearn = time.time() - start_sklearn # Predicting test values y_pred_h2o = reg_h2o.predict(X_test, free_input_data=1) y_pred_h2o = y_pred_h2o.squeeze() y_pred_sklearn = reg_sklearn.predict(X_test) # Calculating R^2 scores r2_h2o = r2_score(y_test, y_pred_h2o) r2_sklearn = r2_score(y_test, y_pred_sklearn) # Clearing the memory reg_h2o.free_sols() reg_h2o.free_preds() reg_h2o.finish() del reg_h2o del reg_sklearn gc.collect() return time_h2o, time_sklearn, r2_h2o, r2_sklearn
def __init__( self, fit_intercept=True, #h2o4gpu normalize=False, copy_X=True, n_jobs=1, n_gpus=-1, tol=1E-4, glm_stop_early=True, # h2o4gpu glm_stop_early_error_fraction=1.0, # h2o4gpu verbose=False, backend='auto'): import os _backend = os.environ.get('H2O4GPU_BACKEND', None) if _backend is not None: backend = _backend assert_is_type(backend, str) if backend == 'auto': # Fall back to Sklearn # Can remove if fully implement sklearn functionality self.do_sklearn = False params_string = ['normalize', 'copy_X', 'n_jobs'] params = [normalize, copy_X, n_jobs] params_default = [False, True, 1] i = 0 self.do_sklearn = False for param in params: if param != params_default[i]: self.do_sklearn = True print("WARNING: The sklearn parameter " + params_string[i] + " has been changed from default to " + str(param) + ". Will run Sklearn Linear Regression.") self.do_sklearn = True i = i + 1 elif backend == 'sklearn': self.do_sklearn = True elif backend == 'h2o4gpu': self.do_sklearn = False self.backend = backend self.model_sklearn = sk.LinearRegressionSklearn( fit_intercept=fit_intercept, normalize=normalize, copy_X=copy_X, n_jobs=n_jobs) # Equivalent Linear Regression parameters for h2o4gpu n_threads = None n_gpus = n_gpus fit_intercept = fit_intercept lambda_min_ratio = 0.0 n_lambdas = 1 n_folds = 1 n_alphas = 1 tol = tol tol_seek_factor = 1E-1 lambda_stop_early = False glm_stop_early = glm_stop_early glm_stop_early_error_fraction = glm_stop_early_error_fraction max_iter = 5000 verbose = verbose family = 'elasticnet' lambda_max = 0.0 alpha_max = 0.0 alpha_min = 0.0 alphas = None lambdas = None self.model_h2o4gpu = elastic_net.ElasticNetH2O( n_threads=n_threads, n_gpus=n_gpus, fit_intercept=fit_intercept, lambda_min_ratio=lambda_min_ratio, n_lambdas=n_lambdas, n_folds=n_folds, n_alphas=n_alphas, tol=tol, lambda_stop_early=lambda_stop_early, glm_stop_early=glm_stop_early, glm_stop_early_error_fraction=glm_stop_early_error_fraction, max_iter=max_iter, verbose=verbose, lambda_max=lambda_max, alpha_max=alpha_max, alpha_min=alpha_min, alphas=alphas, lambdas=lambdas, tol_seek_factor=tol_seek_factor, family=family, order=None) if self.do_sklearn: print("Running sklearn Linear Regression") self.model = self.model_sklearn else: print("Running h2o4gpu Linear Regression") self.model = self.model_h2o4gpu