Example #1
0
def test_ngboost(X_train, y_train, X_val, y_val, X_test, y_test, alpha):
    # Parameter Grid Search
    b5 = DecisionTreeRegressor(criterion='friedman_mse', max_depth=5)
    b10 = DecisionTreeRegressor(criterion='friedman_mse', max_depth=10)
    b15 = DecisionTreeRegressor(criterion='friedman_mse', max_depth=15)
    b20 = DecisionTreeRegressor(criterion='friedman_mse', max_depth=20)
    b25 = DecisionTreeRegressor(criterion='friedman_mse', max_depth=25)
    b30 = DecisionTreeRegressor(criterion='friedman_mse', max_depth=30)
    b35 = DecisionTreeRegressor(criterion='friedman_mse', max_depth=35)

    param_grid = {
        'minibatch_frac': [1.0, 0.75, 0.5],
        'Base': [b5, b10, b15, b20, b25, b30, b35],
        'learning_rate': [1e-3, 5e-3, 1e-2, 5e-2, .1]
    }

    model = ngboost.NGBRegressor(Dist=Normal, Score=LogScore, verbose=False)
    grid_search = GridSearchCV(model, param_grid=param_grid, cv=2, n_jobs=-1)
    grid_search.fit(X_train, y_train)

    # Obtain optimum parameters from grid search and train model with validaton score
    best_model = ngboost.NGBRegressor(
        Dist=Normal,
        verbose=False,
        Base=grid_search.best_params_['Base'],
        minibatch_frac=grid_search.best_params_['minibatch_frac'],
        learning_rate=grid_search.best_params_['learning_rate'],
    ).fit(X_train, y_train, X_val, y_val, early_stopping_rounds=40)

    # Test
    y_pred = best_model.pred_dist(X_test,
                                  max_iter=best_model.best_val_loss_itr)

    # Model Metrics
    rmse = np.mean((y_test - y_pred.params['loc'])**2.)**0.5
    test_ll = sum(ll(y_test, y_pred.params['loc'], y_pred.params['scale']))

    # Confidence Intervals
    on_target = 0
    ranges = []
    for loc, scale, y in zip(y_pred.params['loc'], y_pred.params['scale'],
                             y_test):
        # Compute Interval
        conf_int = stats.norm.interval(1 - alpha, loc=loc, scale=scale)
        # Update Metrics
        if (y >= conf_int[0] and y <= conf_int[1]): on_target += 1
        ranges.append(abs(conf_int[0]) + abs(conf_int[1]))
    coverage = on_target / len(ranges)
    avg_range = np.mean(ranges)

    # Done.
    return (coverage, avg_range, test_ll)
Example #2
0
def final_model(train_valid_setup):
    train = train_valid_setup[0]
    test = train_valid_setup[1]
    dist = train_valid_setup[2]
    eta = train_valid_setup[3]
    it = train_valid_setup[4]

    if dist == "Laplace":
        dist_ = ngboost.distns.Laplace
    elif dist == "T":
        dist_ = ngboost.distns.TFixedDf

    ngb = ngboost.NGBRegressor(Dist=dist_,
                               Score=ngboost.scores.LogScore,
                               Base=ExtraTreeRegressor(max_depth=3,
                                                       min_samples_split=2),
                               n_estimators=it,
                               learning_rate=eta,
                               minibatch_frac=1.0,
                               col_sample=1.0,
                               verbose=False,
                               verbose_eval=500,
                               tol=0.0001,
                               random_state=2021)
    ngb.fit(train[features], train["rr"])
    Y_dists = ngb.pred_dist(test[features])

    return Y_dists.dist.ppf(0.01)[0], Y_dists.dist.ppf(0.025)[0]
Example #3
0
def mp_cv(train_valid_setup):
    train = train_valid_setup[0]
    test = train_valid_setup[1]

    if train_valid_setup[2] == 'ET':
        model = ExtraTreeRegressor(max_depth=3)
    elif train_valid_setup[2] == "DT":
        model = DecisionTreeRegressor(max_depth=3)
    elif train_valid_setup[2] == "Ridge":
        model = Ridge(alpha=0.25)

    ngb = ngboost.NGBRegressor(Dist=ngboost.distns.Laplace,
                               Score=ngboost.scores.LogScore,
                               Base=model,
                               n_estimators=500,
                               learning_rate=0.01,
                               minibatch_frac=1.0,
                               col_sample=1.0,
                               verbose=False,
                               verbose_eval=500,
                               tol=0.0001,
                               random_state=2021)
    ngb.fit(train[features], train["rr"])
    Y_dists = ngb.pred_dist(test[features])

    return [Y_dists.dist.ppf(0.01)[0], Y_dists.dist.ppf(0.025)[0]]
Example #4
0
def test_ngboost():
    try:
        import ngboost
    except:
        print("Skipping test_ngboost!")
        return
    X,y = shap.datasets.boston()
    model = ngboost.NGBRegressor(n_estimators=20).fit(X, y)
    explainer = shap.TreeExplainer(model, model_output=0)
    assert np.max(np.abs(explainer.shap_values(X).sum(1) + explainer.expected_value - model.predict(X))) < 1e-5
Example #5
0
def ngb_pipeline():
    base_model = ngb.NGBRegressor(Dist=Normal,
                                  Score=LogScore,
                                  Base=learner,
                                  n_estimators=500,
                                  learning_rate=0.04,
                                  col_sample=1.0,
                                  minibatch_frac=1.0,
                                  verbose=False,
                                  natural_gradient=True)
    return base_model
Example #6
0
def final_model_switching(train_valid_setup):
    train = train_valid_setup[0]
    test = train_valid_setup[1]
    dist = train_valid_setup[2]
    eta = train_valid_setup[3]
    it = train_valid_setup[4]
    switch = train_valid_setup[5]

    if dist == "Laplace":
        dist_ = ngboost.distns.Laplace
    elif dist == "T":
        dist_ = ngboost.distns.TFixedDf

    ngb = ngboost.NGBRegressor(Dist=dist_,
                               Score=ngboost.scores.LogScore,
                               Base=ExtraTreeRegressor(max_depth=3,
                                                       min_samples_split=2),
                               n_estimators=it,
                               learning_rate=eta,
                               minibatch_frac=1.0,
                               col_sample=1.0,
                               verbose=False,
                               verbose_eval=500,
                               tol=0.0001,
                               random_state=2021)
    ngb.fit(train[features], train["rr"])
    Y_dists = ngb.pred_dist(test[features])
    VaR1_NGB, VaR25_NGB = Y_dists.dist.ppf(0.01)[0], Y_dists.dist.ppf(0.025)[0]

    am = arch_model(train["rr"],
                    vol="GARCH",
                    p=1,
                    o=0,
                    q=1,
                    dist="normal",
                    mean="AR",
                    lags=1)
    res = am.fit(disp="off")
    forecasts = res.forecast(horizon=1)
    cond_mean = forecasts.mean.iloc[-1, 0]
    cond_var = forecasts.variance.iloc[-1, 0]
    stan_resid = res.resid[1:] / res.conditional_volatility[1:]
    q = np.percentile(stan_resid, [1, 2.5])
    VaR1_GARCH, VaR25_GARCH = cond_mean + np.sqrt(cond_var) * q

    if switch == 1:
        return VaR1_GARCH, VaR25_GARCH
    else:
        return VaR1_NGB, VaR25_NGB
Example #7
0
def mp_cv_ET_hyp_tuning(train_valid_setup):
    train = train_valid_setup[0]
    test = train_valid_setup[1]

    ngb = ngboost.NGBRegressor(Dist=ngboost.distns.Laplace,
                               Score=ngboost.scores.LogScore,
                               Base=ExtraTreeRegressor(
                                   max_depth=train_valid_setup[2],
                                   min_samples_split=train_valid_setup[3]),
                               n_estimators=500,
                               learning_rate=0.01,
                               minibatch_frac=1.0,
                               col_sample=1.0,
                               verbose=False,
                               verbose_eval=500,
                               tol=0.0001,
                               random_state=2021)
    ngb.fit(train[features], train["rr"])
    Y_dists = ngb.pred_dist(test[features])

    return [Y_dists.dist.ppf(0.01)[0], Y_dists.dist.ppf(0.025)[0]]
Example #8
0
def final_model_ver2(train_valid_setup):
    train = train_valid_setup[0]
    test = train_valid_setup[1]
    dist = train_valid_setup[2]
    eta = train_valid_setup[3]
    it = train_valid_setup[4]

    if dist == "Laplace":
        dist_ = ngboost.distns.Laplace
    elif dist == "T":
        dist_ = ngboost.distns.TFixedDf

    ngb = ngboost.NGBRegressor(Dist=dist_,
                               Score=ngboost.scores.LogScore,
                               Base=ExtraTreeRegressor(max_depth=3,
                                                       min_samples_split=2),
                               n_estimators=it,
                               learning_rate=eta,
                               minibatch_frac=1.0,
                               col_sample=1.0,
                               verbose=False,
                               verbose_eval=500,
                               tol=0.0001,
                               random_state=2021)
    ngb.fit(train[features], train["rr"])
    Y_dists = ngb.pred_dist(test[features])

    Y_dists_train = ngb.pred_dist(train[features])
    residuals = np.array(train.rr) - ngb.predict(train[features])
    standardized_residuals = (residuals) / np.sqrt(
        2 * (Y_dists_train.params["scale"])**2)
    standardized_residuals_df = pd.DataFrame({"t": standardized_residuals})
    VaR1 = Y_dists.params["loc"][0] + np.sqrt(
        2 * (Y_dists.params["scale"][0])**
        2) * standardized_residuals_df.t.quantile(0.01)
    VaR25 = Y_dists.params["loc"][0] + np.sqrt(
        2 * (Y_dists.params["scale"][0])**
        2) * standardized_residuals_df.t.quantile(0.025)
    return VaR1, VaR25