Ejemplo n.º 1
0
def run_over_all_input_parameters_reg(X, y, max_evals, cvs, val_sizes, methods, pos_labels, test_sizes, top_cv_evals, thresholds, eval_metrics):
    random_state = 42
    out = []
    count = 0
    for max_eval in max_evals:
        for cv in cvs:
            for val_size in val_sizes:
                for method in methods:
                    for pos_label in pos_labels:
                        for test_size in test_sizes:
                            for top_cv_eval in top_cv_evals:
                                for threshold in thresholds:
                                    for eval_metric in eval_metrics:
                                        try:
                                            status = 'OK'
                                            loss = np.nan
                                            # Setup model
                                            hgb = hgboost(max_eval=max_eval, threshold=threshold, cv=cv, test_size=test_size, val_size=val_size, top_cv_evals=top_cv_eval, random_state=random_state, verbose=2)
                                            # Fit model
                                            if method=='xgb_reg':
                                                hgb.xgboost_reg(X, y, eval_metric=eval_metric);
                                            elif method=='ctb_reg':
                                                hgb.catboost_reg(X, y, eval_metric=eval_metric);
                                            elif method=='lgb_reg':
                                                hgb.lightboost_reg(X, y, eval_metric=eval_metric);

                                            # use the predictor
                                            y_pred, y_proba = hgb.predict(X)
                                            # Loss score
                                            loss = hgb.results['summary']['loss'].iloc[np.where(hgb.results['summary']['best'])[0]].values
                                            # Make some plots
                                            # assert gs.plot_params(return_ax=True)
                                            # assert gs.plot(return_ax=True)
                                            # assert gs.treeplot(return_ax=True)
                                            # if (val_size is not None):
                                            #     ax = gs.plot_validation(return_ax=True)
                                            #     assert len(ax)>=2
                                        except ValueError as err:
                                            assert not 'hgboost' in err.args
                                            status = err.args
                                            print(err.args)

                                        tmpout = {'max_eval':max_eval,
                                                       'threshold':threshold,
                                                       'cv':cv,
                                                       'test_size':test_size,
                                                       'val_size':val_size,
                                                       'top_cv_evals':top_cv_eval,
                                                       'random_state':random_state,
                                                       'pos_label':pos_label,
                                                       'method':method,
                                                       'eval_metric':eval_metric,
                                                       'loss':loss,
                                                       'status':status,
                                                       }
                                        out.append(tmpout)
                                        count=count+1

    print('Fin! Total number of models evaluated with different paramters: %.0d' %(count))
    return(pd.DataFrame(out))
Ejemplo n.º 2
0
def get_data():
    from hgboost import hgboost
    gs = hgboost()
    df = gs.import_example()
    y = df['Parch'].values
    y[y>=3]=3
    del df['Parch']
    X = gs.preprocessing(df, verbose=0)
    return X, y
Ejemplo n.º 3
0
def get_data_reg():
    from hgboost import hgboost
    gs = hgboost()
    df = gs.import_example()
    y = df['Age'].values
    del df['Age']
    I = ~np.isnan(y)
    X = gs.preprocessing(df, verbose=0)
    X = X.loc[I,:]
    y = y[I]
    return X, y
Ejemplo n.º 4
0
# hyperparameter searching
# feature importance
# early stopping
# plotting

# %%
from hgboost import hgboost
print(dir(hgboost))
# print(hgboost.__version__)
import numpy as np

# %% HYPEROPTIMIZED XGBOOST
hgb_xgb = hgboost(max_eval=10,
                  threshold=0.5,
                  cv=5,
                  test_size=0.2,
                  val_size=0.2,
                  top_cv_evals=10,
                  random_state=None,
                  verbose=3)
hgb_cat = hgboost(max_eval=10,
                  threshold=0.5,
                  cv=5,
                  test_size=0.2,
                  val_size=0.2,
                  top_cv_evals=10,
                  random_state=None,
                  verbose=3)
hgb_light = hgboost(max_eval=10,
                    threshold=0.5,
                    cv=5,
                    test_size=0.2,