def run_over_all_input_parameters_reg(X, y, max_evals, cvs, val_sizes, methods, pos_labels, test_sizes, top_cv_evals, thresholds, eval_metrics): random_state = 42 out = [] count = 0 for max_eval in max_evals: for cv in cvs: for val_size in val_sizes: for method in methods: for pos_label in pos_labels: for test_size in test_sizes: for top_cv_eval in top_cv_evals: for threshold in thresholds: for eval_metric in eval_metrics: try: status = 'OK' loss = np.nan # Setup model hgb = hgboost(max_eval=max_eval, threshold=threshold, cv=cv, test_size=test_size, val_size=val_size, top_cv_evals=top_cv_eval, random_state=random_state, verbose=2) # Fit model if method=='xgb_reg': hgb.xgboost_reg(X, y, eval_metric=eval_metric); elif method=='ctb_reg': hgb.catboost_reg(X, y, eval_metric=eval_metric); elif method=='lgb_reg': hgb.lightboost_reg(X, y, eval_metric=eval_metric); # use the predictor y_pred, y_proba = hgb.predict(X) # Loss score loss = hgb.results['summary']['loss'].iloc[np.where(hgb.results['summary']['best'])[0]].values # Make some plots # assert gs.plot_params(return_ax=True) # assert gs.plot(return_ax=True) # assert gs.treeplot(return_ax=True) # if (val_size is not None): # ax = gs.plot_validation(return_ax=True) # assert len(ax)>=2 except ValueError as err: assert not 'hgboost' in err.args status = err.args print(err.args) tmpout = {'max_eval':max_eval, 'threshold':threshold, 'cv':cv, 'test_size':test_size, 'val_size':val_size, 'top_cv_evals':top_cv_eval, 'random_state':random_state, 'pos_label':pos_label, 'method':method, 'eval_metric':eval_metric, 'loss':loss, 'status':status, } out.append(tmpout) count=count+1 print('Fin! Total number of models evaluated with different paramters: %.0d' %(count)) return(pd.DataFrame(out))
def get_data(): from hgboost import hgboost gs = hgboost() df = gs.import_example() y = df['Parch'].values y[y>=3]=3 del df['Parch'] X = gs.preprocessing(df, verbose=0) return X, y
def get_data_reg(): from hgboost import hgboost gs = hgboost() df = gs.import_example() y = df['Age'].values del df['Age'] I = ~np.isnan(y) X = gs.preprocessing(df, verbose=0) X = X.loc[I,:] y = y[I] return X, y
# hyperparameter searching # feature importance # early stopping # plotting # %% from hgboost import hgboost print(dir(hgboost)) # print(hgboost.__version__) import numpy as np # %% HYPEROPTIMIZED XGBOOST hgb_xgb = hgboost(max_eval=10, threshold=0.5, cv=5, test_size=0.2, val_size=0.2, top_cv_evals=10, random_state=None, verbose=3) hgb_cat = hgboost(max_eval=10, threshold=0.5, cv=5, test_size=0.2, val_size=0.2, top_cv_evals=10, random_state=None, verbose=3) hgb_light = hgboost(max_eval=10, threshold=0.5, cv=5, test_size=0.2,