Exemple #1
0
 def test_classification(self):
     data, target = load_breast_cancer(True)
     x_train, x_test, y_train, y_test = train_test_split(data,
                                                         target,
                                                         test_size=0.2,
                                                         random_state=42)
     ngb = NGBoost(Base=default_tree_learner,
                   Dist=Bernoulli,
                   Score=MLE,
                   verbose=False)
     ngb.fit(x_train, y_train)
     preds = ngb.pred_dist(x_test)
     score = roc_auc_score(y_test, preds.prob)
     assert score >= 0.95
Exemple #2
0
    ngb = NGBoost(
        Base=default_tree_learner,
        Dist=Normal,
        Score=MLE,
        n_estimators=args.n_estimators,
        learning_rate=args.lr,
        natural_gradient=args.natural,
        minibatch_frac=args.minibatch_frac,
        verbose=True,
    )

    ngb.fit(x_tr, y_tr)

    x_te, y_te, _ = gen_data(n=1000, bound=1.3)
    x_te = poly_transform.transform(x_te)
    preds = ngb.pred_dist(x_te)

    pctles, obs, _, _ = calibration_regression(preds, y_te)

    all_preds = ngb.staged_pred_dist(x_te)
    preds = all_preds[-1]
    plt.figure(figsize=(6, 3))
    plt.scatter(x_tr[:, 1], y_tr, color="black", marker=".", alpha=0.5)
    plt.plot(
        x_te[:, 1],
        preds.loc,
        color="black",
        linestyle="-",
        linewidth=1,
        label="Predicted mean",
    )
Exemple #3
0
        best_itr = np.argmin(val_rmse) + 1
        best_itr = np.argmin(val_nll) + 1

        full_retrain = True
        if full_retrain:
            ngb = NGBoost(Base=base_name_to_learner[args.base],
                      Dist=eval(args.distn),
                      Score=score_name_to_score[args.score](64),
                      n_estimators=args.n_est,
                      learning_rate=args.lr,
                      natural_gradient=args.natural,
                      minibatch_frac=args.minibatch_frac,
                      verbose=args.verbose)
            ngb.fit(X_trainall, y_trainall)

        forecast = ngb.pred_dist(X_test, max_iter=best_itr)

        y_ngb += list(forecast.loc)
        ngb_rmse += [np.sqrt(mean_squared_error(forecast.loc, y_test))]
        ngb_nll += [-forecast.logpdf(y_test.flatten()).mean()]
        
        #print(np.sqrt(mean_squared_error(forecast.loc, y_test)))
        #for idx, y_p, y_t in zip(test_index, list(forecast.loc), y_test):
        #    print(idx, y_t, y_p, np.abs(y_p - y_t))

        if args.verbose or True:
            print("[%d/%d] BestIter=%d RMSE: Val=%.4f Test=%.4f NLL: Test=%.4f" % (itr+1, args.n_splits,
                                                                                   best_itr, np.sqrt(val_rmse[best_itr-1]),
                                                                                   np.sqrt(mean_squared_error(forecast.loc, y_test)),
                                                                                   ngb_nll[-1]))
from ngboost.ngboost import NGBoost
from ngboost.distns import Bernoulli
from ngboost.learners import default_tree_learner
from ngboost.scores import MLE

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

if __name__ == "__main__":

    X, Y = load_breast_cancer(True)
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

    ngb = NGBoost(Base=default_tree_learner,
                  Dist=Bernoulli,
                  Score=MLE(),
                  verbose=True)
    ngb.fit(X_train, Y_train)

    preds = ngb.pred_dist(X_test)
    print("ROC:", roc_auc_score(Y_test, preds.prob))
Exemple #5
0
    argparser.add_argument("--distn", type=str, default="Normal")
    argparser.add_argument("--natural", action="store_true")
    argparser.add_argument("--score", type=str, default="CRPS")
    args = argparser.parse_args()

    np.random.seed(123)

    m, n = 1200, 50
    noise = np.random.randn(*(m, 1))
    beta1 = np.random.randn(n, 1)
    X = np.random.randn(m, n) / np.sqrt(n)
    Y = X @ beta1 + args.noise_lvl * noise
    print(X.shape, Y.shape)

    X_train, X_test = X[:1000, :], X[1000:, ]
    Y_train, Y_test = Y[:1000], Y[1000:]

    ngb = NGBoost(n_estimators=400,
                  learning_rate=args.lr,
                  Dist=Normal,
                  Base=default_linear_learner,
                  natural_gradient=args.natural,
                  minibatch_frac=1.0,
                  Score=eval(args.score)(),
                  verbose=True,
                  verbose_eval=10)

    losses = ngb.fit(X_train, Y_train)
    forecast = ngb.pred_dist(X_test)
    print("R2:", r2_score(Y_test, forecast.loc))
    # Y = X @ beta + 0.5 * noise
    Y = X @ beta1 + 0.5 * np.sqrt(np.exp(X @ beta2)) * noise
    print(X.shape, Y.shape)

    axis = np.linspace(0.0, 2, 200)
    plt.figure(figsize=(8, 3))

    ngb = NGBoost(n_estimators=100,
                  learning_rate=1.0,
                  Dist=Normal,
                  Base=default_linear_learner,
                  natural_gradient=True,
                  minibatch_frac=1.0,
                  Score=CRPS())
    ngb.fit(X, Y)
    preds = ngb.pred_dist(X)
    print(preds.scale.mean())
    print(preds.scale.std())
    pctles, observed, slope, intercept = calibration_regression(preds, Y)

    plt.subplot(1, 2, 1)
    plot_pit_histogram(pctles, observed, label="CRPS", linestyle="--")
    plt.subplot(1, 2, 2)
    plt.plot(axis,
             gaussian_kde(preds.scale)(axis),
             linestyle="--",
             color="black",
             label="CRPS")

    ngb = NGBoost(n_estimators=100,
                  learning_rate=0.5,
Exemple #7
0
                                                            test_size=0.2)
        X_train, X_val, Y_train, Y_val = train_test_split(X_train,
                                                          Y_train,
                                                          test_size=0.2)

        ngb = NGBoost(Dist=eval(args.distn),
                      n_estimators=args.n_est,
                      learning_rate=args.lr,
                      natural_gradient=args.natural,
                      verbose=args.verbose,
                      minibatch_frac=1.0,
                      Base=base_name_to_learner[args.base],
                      Score=eval(args.score)())

        train_losses = ngb.fit(X_train, Y_train)  #, X_val, Y_val)
        forecast = ngb.pred_dist(X_test)
        train_forecast = ngb.pred_dist(X_train)
        print('NGB score: %.4f (val), %.4f (train)' %
              (concordance_index_censored(Y_test['Event'], Y_test['Time'],
                                          -forecast.mean())[0],
               concordance_index_censored(Y_train['Event'], Y_train['Time'],
                                          -train_forecast.mean())[0]))
        #logger.tick(forecast, Y_test)

        ##
        ## sksurv
        ##
        gbsa = GBSA(n_estimators=args.n_est,
                    learning_rate=args.lr,
                    subsample=args.minibatch_frac,
                    verbose=args.verbose)
Exemple #8
0
    start = datetime.now().timestamp()
    qreg = MLPQuantile()
    qreg.fit(X_train_std,y_train)
    preds = qreg.predict(X_test_std)
    end = datetime.now().timestamp()
    results=evaluate((np.exp(preds)-1),(np.exp(y_test)-1).values)
    results["duration"]=end-start
    save_result([horizon,
                    "MLP",
                    results,
                    1],f"unit_{horizon}",folder)

    start = datetime.now().timestamp()
    ngb = NGBoost(Base=default_tree_learner, Dist=Normal, Score=MLE(), natural_gradient=True,
              verbose=True,n_estimators=1500)
    ngb.fit(X_train_std, y_train.values)
    Y_dists = ngb.pred_dist(X_test_std)
    a=pd.DataFrame()
    for i in np.arange(1,100):
        a[i]=Y_dists.ppf(i/100)
    preds = a.values
    end = datetime.now().timestamp()
    results=evaluate((np.exp(preds)-1),(np.exp(y_test)-1).values)
    results["duration"]=end-start
    save_result([horizon,
                    "NGBOOST",
                    results,
                    1],f"unit_{horizon}",folder)