Exemple #1
0
    def test_squared_loss(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        model = GradientBoostingSurvivalAnalysis(loss="squared",
                                                 n_estimators=100,
                                                 max_depth=3,
                                                 random_state=0)
        model.fit(whas500_data.x, whas500_data.y)

        time_predicted = model.predict(whas500_data.x)
        time_true = whas500_data.y["lenfol"]
        event_true = whas500_data.y["fstat"]

        rmse_all = numpy.sqrt(mean_squared_error(time_true, time_predicted))
        assert round(abs(rmse_all - 580.23345259002951), 7) == 0

        rmse_uncensored = numpy.sqrt(
            mean_squared_error(time_true[event_true],
                               time_predicted[event_true]))
        assert round(abs(rmse_uncensored - 383.10639243317951), 7) == 0

        cindex = model.score(whas500_data.x, whas500_data.y)
        assert round(abs(cindex - 0.9021810004), 7) == 0

        with pytest.raises(
                ValueError,
                match="`fit` must be called with the loss option set to 'coxph'"
        ):
            model.predict_survival_function(whas500_data.x)

        with pytest.raises(
                ValueError,
                match="`fit` must be called with the loss option set to 'coxph'"
        ):
            model.predict_cumulative_hazard_function(whas500_data.x)
def fit_and_score_features(X, y):
    n_features = X.shape[1]
    scores = np.empty(n_features)
    m = GradientBoostingSurvivalAnalysis(verbose=True, n_estimators=500)
    for j in range(n_features):
        Xj = X[:, j:j + 1]
        m.fit(Xj, y)
        scores[j] = m.score(Xj, y)
    return scores
    def test_squared_loss(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        model = GradientBoostingSurvivalAnalysis(loss="squared", n_estimators=100, max_depth=3, random_state=0)
        model.fit(whas500_data.x, whas500_data.y)

        time_predicted = model.predict(whas500_data.x)
        time_true = whas500_data.y["lenfol"]
        event_true = whas500_data.y["fstat"]

        rmse_all = numpy.sqrt(mean_squared_error(time_true, time_predicted))
        assert round(abs(rmse_all - 580.23345259002951), 7) == 0

        rmse_uncensored = numpy.sqrt(mean_squared_error(time_true[event_true], time_predicted[event_true]))
        assert round(abs(rmse_uncensored - 383.10639243317951), 7) == 0

        cindex = model.score(whas500_data.x, whas500_data.y)
        assert round(abs(cindex - 0.9021810004), 7) == 0
    def test_ipcwls_loss(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        model = GradientBoostingSurvivalAnalysis(loss="ipcwls", n_estimators=100, max_depth=3, random_state=0)
        model.fit(whas500_data.x, whas500_data.y)

        time_predicted = model.predict(whas500_data.x)
        time_true = whas500_data.y["lenfol"]
        event_true = whas500_data.y["fstat"]

        rmse_all = numpy.sqrt(mean_squared_error(time_true, time_predicted))
        assert round(abs(rmse_all - 590.5441693629117), 7) == 0

        rmse_uncensored = numpy.sqrt(mean_squared_error(time_true[event_true], time_predicted[event_true]))
        assert round(abs(rmse_uncensored - 392.97741487479743), 7) == 0

        cindex = model.score(whas500_data.x, whas500_data.y)
        assert round(abs(cindex - 0.8979161399), 7) == 0
Exemple #5
0
            verbose=args.verbose,
            minibatch_frac=1.0,
            Base=base_name_to_learner[args.base],
            Score=eval(args.score),
        )

        train_losses = ngb.fit(X_train, Y_train, E_train)
        forecast = ngb.pred_dist(X_test)
        train_forecast = ngb.pred_dist(X_train)
        print("NGB score: %.4f (val), %.4f (train)" % (
            concordance_index_censored(E_test.astype(bool), Y_test,
                                       -forecast.mean())[0],
            concordance_index_censored(E_train.astype(bool), Y_train,
                                       -train_forecast.mean())[0],
        ))

        ##
        ## sksurv
        ##
        gbsa = GBSA(
            n_estimators=args.n_est,
            learning_rate=args.lr,
            subsample=args.minibatch_frac,
            verbose=args.verbose,
        )
        gbsa.fit(X_train, Y_join(Y_train, E_train))
        print("GBSA score: %.4f (val), %.4f (train)" % (
            gbsa.score(X_test, Y_join(Y_test, E_test)),
            gbsa.score(X_train, Y_join(Y_train, E_train)),
        ))
Exemple #6
0
        X_train, X_val, Y_train, Y_val, E_train, E_val = train_test_split(X_train, Y_train, E_train, test_size=0.2)

        ngb = NGBSurvival(Dist=eval(args.distn),
                          n_estimators=args.n_est,
                          learning_rate=args.lr,
                          natural_gradient=args.natural,
                          verbose=args.verbose,
                          minibatch_frac=1.0,
                          Base=base_name_to_learner[args.base],
                          Score=eval(args.score))

        train_losses = ngb.fit(X_train, Y_train, E_train)
        forecast = ngb.pred_dist(X_test)
        train_forecast = ngb.pred_dist(X_train)
        print('NGB score: %.4f (val), %.4f (train)' % (concordance_index_censored(E_test.astype(bool), Y_test, -forecast.mean())[0],
                                                       concordance_index_censored(E_train.astype(bool), Y_train, -train_forecast.mean())[0]
        ))

        ##
        ## sksurv
        ##
        gbsa = GBSA(n_estimators=args.n_est,
                    learning_rate=args.lr,
                    subsample=args.minibatch_frac,
                    verbose=args.verbose)
        gbsa.fit(X_train, Y_join(Y_train, E_train))
        print('GBSA score: %.4f (val), %.4f (train)' % (gbsa.score(X_test, Y_join(Y_test, E_test)),
                                                        gbsa.score(X_train, Y_join(Y_train, E_train))))


Exemple #7
0
                      n_estimators=args.n_est,
                      learning_rate=args.lr,
                      natural_gradient=args.natural,
                      verbose=args.verbose,
                      minibatch_frac=1.0,
                      Base=base_name_to_learner[args.base],
                      Score=eval(args.score)())

        train_losses = ngb.fit(X_train, Y_train)  #, X_val, Y_val)
        forecast = ngb.pred_dist(X_test)
        train_forecast = ngb.pred_dist(X_train)
        print('NGB score: %.4f (val), %.4f (train)' %
              (concordance_index_censored(Y_test['Event'], Y_test['Time'],
                                          -forecast.mean())[0],
               concordance_index_censored(Y_train['Event'], Y_train['Time'],
                                          -train_forecast.mean())[0]))
        #logger.tick(forecast, Y_test)

        ##
        ## sksurv
        ##
        gbsa = GBSA(n_estimators=args.n_est,
                    learning_rate=args.lr,
                    subsample=args.minibatch_frac,
                    verbose=args.verbose)
        gbsa.fit(X_train, Y_train)
        print('GBSA score: %.4f (val), %.4f (train)' %
              (gbsa.score(X_test, Y_test), gbsa.score(X_train, Y_train)))

    #logger.save()
    df2['T'] = T

    X, y = get_x_y(df2, ['E', 'T'], pos_label=True)

    for c in X.columns.values:
        if c != 'AGE AT DOC':
            X[c] = X[c].astype('category')

    data_x_numeric = OneHotEncoder().fit_transform(X)
    #%%

    estimator = GradientBoostingSurvivalAnalysis(verbose=True,
                                                 n_estimators=500)
    estimator.fit(data_x_numeric, y)

    print(estimator.score(data_x_numeric, y))
    print()

    scores = fit_and_score_features(data_x_numeric.values, y)
    print(
        pd.Series(scores,
                  index=data_x_numeric.columns).sort_values(ascending=False))

    pickle.dump(estimator, open('GradientRegressor.pkl', 'wb'))

    #%%

    from sklearn.feature_selection import SelectKBest
    from sklearn.pipeline import Pipeline

    pipe = Pipeline([('encode', OneHotEncoder()),