def test_ngboost(X_train, y_train, X_val, y_val, X_test, y_test, alpha): # Parameter Grid Search b5 = DecisionTreeRegressor(criterion='friedman_mse', max_depth=5) b10 = DecisionTreeRegressor(criterion='friedman_mse', max_depth=10) b15 = DecisionTreeRegressor(criterion='friedman_mse', max_depth=15) b20 = DecisionTreeRegressor(criterion='friedman_mse', max_depth=20) b25 = DecisionTreeRegressor(criterion='friedman_mse', max_depth=25) b30 = DecisionTreeRegressor(criterion='friedman_mse', max_depth=30) b35 = DecisionTreeRegressor(criterion='friedman_mse', max_depth=35) param_grid = { 'minibatch_frac': [1.0, 0.75, 0.5], 'Base': [b5, b10, b15, b20, b25, b30, b35], 'learning_rate': [1e-3, 5e-3, 1e-2, 5e-2, .1] } model = ngboost.NGBRegressor(Dist=Normal, Score=LogScore, verbose=False) grid_search = GridSearchCV(model, param_grid=param_grid, cv=2, n_jobs=-1) grid_search.fit(X_train, y_train) # Obtain optimum parameters from grid search and train model with validaton score best_model = ngboost.NGBRegressor( Dist=Normal, verbose=False, Base=grid_search.best_params_['Base'], minibatch_frac=grid_search.best_params_['minibatch_frac'], learning_rate=grid_search.best_params_['learning_rate'], ).fit(X_train, y_train, X_val, y_val, early_stopping_rounds=40) # Test y_pred = best_model.pred_dist(X_test, max_iter=best_model.best_val_loss_itr) # Model Metrics rmse = np.mean((y_test - y_pred.params['loc'])**2.)**0.5 test_ll = sum(ll(y_test, y_pred.params['loc'], y_pred.params['scale'])) # Confidence Intervals on_target = 0 ranges = [] for loc, scale, y in zip(y_pred.params['loc'], y_pred.params['scale'], y_test): # Compute Interval conf_int = stats.norm.interval(1 - alpha, loc=loc, scale=scale) # Update Metrics if (y >= conf_int[0] and y <= conf_int[1]): on_target += 1 ranges.append(abs(conf_int[0]) + abs(conf_int[1])) coverage = on_target / len(ranges) avg_range = np.mean(ranges) # Done. return (coverage, avg_range, test_ll)
def final_model(train_valid_setup): train = train_valid_setup[0] test = train_valid_setup[1] dist = train_valid_setup[2] eta = train_valid_setup[3] it = train_valid_setup[4] if dist == "Laplace": dist_ = ngboost.distns.Laplace elif dist == "T": dist_ = ngboost.distns.TFixedDf ngb = ngboost.NGBRegressor(Dist=dist_, Score=ngboost.scores.LogScore, Base=ExtraTreeRegressor(max_depth=3, min_samples_split=2), n_estimators=it, learning_rate=eta, minibatch_frac=1.0, col_sample=1.0, verbose=False, verbose_eval=500, tol=0.0001, random_state=2021) ngb.fit(train[features], train["rr"]) Y_dists = ngb.pred_dist(test[features]) return Y_dists.dist.ppf(0.01)[0], Y_dists.dist.ppf(0.025)[0]
def mp_cv(train_valid_setup): train = train_valid_setup[0] test = train_valid_setup[1] if train_valid_setup[2] == 'ET': model = ExtraTreeRegressor(max_depth=3) elif train_valid_setup[2] == "DT": model = DecisionTreeRegressor(max_depth=3) elif train_valid_setup[2] == "Ridge": model = Ridge(alpha=0.25) ngb = ngboost.NGBRegressor(Dist=ngboost.distns.Laplace, Score=ngboost.scores.LogScore, Base=model, n_estimators=500, learning_rate=0.01, minibatch_frac=1.0, col_sample=1.0, verbose=False, verbose_eval=500, tol=0.0001, random_state=2021) ngb.fit(train[features], train["rr"]) Y_dists = ngb.pred_dist(test[features]) return [Y_dists.dist.ppf(0.01)[0], Y_dists.dist.ppf(0.025)[0]]
def test_ngboost(): try: import ngboost except: print("Skipping test_ngboost!") return X,y = shap.datasets.boston() model = ngboost.NGBRegressor(n_estimators=20).fit(X, y) explainer = shap.TreeExplainer(model, model_output=0) assert np.max(np.abs(explainer.shap_values(X).sum(1) + explainer.expected_value - model.predict(X))) < 1e-5
def ngb_pipeline(): base_model = ngb.NGBRegressor(Dist=Normal, Score=LogScore, Base=learner, n_estimators=500, learning_rate=0.04, col_sample=1.0, minibatch_frac=1.0, verbose=False, natural_gradient=True) return base_model
def final_model_switching(train_valid_setup): train = train_valid_setup[0] test = train_valid_setup[1] dist = train_valid_setup[2] eta = train_valid_setup[3] it = train_valid_setup[4] switch = train_valid_setup[5] if dist == "Laplace": dist_ = ngboost.distns.Laplace elif dist == "T": dist_ = ngboost.distns.TFixedDf ngb = ngboost.NGBRegressor(Dist=dist_, Score=ngboost.scores.LogScore, Base=ExtraTreeRegressor(max_depth=3, min_samples_split=2), n_estimators=it, learning_rate=eta, minibatch_frac=1.0, col_sample=1.0, verbose=False, verbose_eval=500, tol=0.0001, random_state=2021) ngb.fit(train[features], train["rr"]) Y_dists = ngb.pred_dist(test[features]) VaR1_NGB, VaR25_NGB = Y_dists.dist.ppf(0.01)[0], Y_dists.dist.ppf(0.025)[0] am = arch_model(train["rr"], vol="GARCH", p=1, o=0, q=1, dist="normal", mean="AR", lags=1) res = am.fit(disp="off") forecasts = res.forecast(horizon=1) cond_mean = forecasts.mean.iloc[-1, 0] cond_var = forecasts.variance.iloc[-1, 0] stan_resid = res.resid[1:] / res.conditional_volatility[1:] q = np.percentile(stan_resid, [1, 2.5]) VaR1_GARCH, VaR25_GARCH = cond_mean + np.sqrt(cond_var) * q if switch == 1: return VaR1_GARCH, VaR25_GARCH else: return VaR1_NGB, VaR25_NGB
def mp_cv_ET_hyp_tuning(train_valid_setup): train = train_valid_setup[0] test = train_valid_setup[1] ngb = ngboost.NGBRegressor(Dist=ngboost.distns.Laplace, Score=ngboost.scores.LogScore, Base=ExtraTreeRegressor( max_depth=train_valid_setup[2], min_samples_split=train_valid_setup[3]), n_estimators=500, learning_rate=0.01, minibatch_frac=1.0, col_sample=1.0, verbose=False, verbose_eval=500, tol=0.0001, random_state=2021) ngb.fit(train[features], train["rr"]) Y_dists = ngb.pred_dist(test[features]) return [Y_dists.dist.ppf(0.01)[0], Y_dists.dist.ppf(0.025)[0]]
def final_model_ver2(train_valid_setup): train = train_valid_setup[0] test = train_valid_setup[1] dist = train_valid_setup[2] eta = train_valid_setup[3] it = train_valid_setup[4] if dist == "Laplace": dist_ = ngboost.distns.Laplace elif dist == "T": dist_ = ngboost.distns.TFixedDf ngb = ngboost.NGBRegressor(Dist=dist_, Score=ngboost.scores.LogScore, Base=ExtraTreeRegressor(max_depth=3, min_samples_split=2), n_estimators=it, learning_rate=eta, minibatch_frac=1.0, col_sample=1.0, verbose=False, verbose_eval=500, tol=0.0001, random_state=2021) ngb.fit(train[features], train["rr"]) Y_dists = ngb.pred_dist(test[features]) Y_dists_train = ngb.pred_dist(train[features]) residuals = np.array(train.rr) - ngb.predict(train[features]) standardized_residuals = (residuals) / np.sqrt( 2 * (Y_dists_train.params["scale"])**2) standardized_residuals_df = pd.DataFrame({"t": standardized_residuals}) VaR1 = Y_dists.params["loc"][0] + np.sqrt( 2 * (Y_dists.params["scale"][0])** 2) * standardized_residuals_df.t.quantile(0.01) VaR25 = Y_dists.params["loc"][0] + np.sqrt( 2 * (Y_dists.params["scale"][0])** 2) * standardized_residuals_df.t.quantile(0.025) return VaR1, VaR25