def test_n_estimators(make_whas500):
        whas500 = make_whas500(with_mean=False,
                               with_std=False,
                               to_numeric=True)
        base_estimators = [('gbm',
                            ComponentwiseGradientBoostingSurvivalAnalysis()),
                           ('svm', FastSurvivalSVM())]
        meta = EnsembleSelection(base_estimators,
                                 scorer=score_cindex,
                                 n_estimators=0)

        with pytest.raises(ValueError,
                           match="n_estimators must not be zero or negative"):
            meta.fit(whas500.x, whas500.y)

        meta.set_params(n_estimators=1000)
        with pytest.raises(
                ValueError,
                match=r"n_estimators \(1000\) must not exceed number "
                r"of base learners \(2\)"):
            meta.fit(whas500.x, whas500.y)
Example #2
0
    def test_min_correlation(self):
        base_estimators = [('gbm',
                            ComponentwiseGradientBoostingSurvivalAnalysis()),
                           ('svm', FastSurvivalSVM())]
        meta = EnsembleSelection(base_estimators,
                                 scorer=score_cindex,
                                 min_correlation=1.2)

        self.assertRaisesRegex(
            ValueError, "min_correlation must be in \[-1; 1\], but was 1.2",
            meta.fit, self.x, self.y)

        meta.set_params(min_correlation=-2.1)
        self.assertRaisesRegex(
            ValueError, "min_correlation must be in \[-1; 1\], but was -2.1",
            meta.fit, self.x, self.y)

        meta.set_params(min_correlation=numpy.nan)
        self.assertRaisesRegex(
            ValueError, "min_correlation must be in \[-1; 1\], but was nan",
            meta.fit, self.x, self.y)
    def test_correlation(make_whas500):
        whas500 = make_whas500(with_mean=False, with_std=False, to_numeric=True)
        base_estimators = [('gbm', ComponentwiseGradientBoostingSurvivalAnalysis()),
                           ('svm', FastSurvivalSVM())]
        meta = EnsembleSelection(base_estimators, scorer=score_cindex, correlation=None)
        with pytest.raises(ValueError,
                           match="correlation must be one of 'pearson', 'kendall', "
                                 "and 'spearman', but got None"):
            meta.fit(whas500.x, whas500.y)

        meta = EnsembleSelection(base_estimators, scorer=score_cindex, correlation=2143)
        with pytest.raises(ValueError,
                           match="correlation must be one of 'pearson', 'kendall', "
                                 "and 'spearman', but got 2143"):
            meta.fit(whas500.x, whas500.y)

        meta = EnsembleSelection(base_estimators, scorer=score_cindex, correlation="clearly wrong")
        with pytest.raises(ValueError,
                           match="correlation must be one of 'pearson', 'kendall', "
                                 "and 'spearman', but got 'clearly wrong'"):
            meta.fit(whas500.x, whas500.y)
    def test_squared_loss(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        model = ComponentwiseGradientBoostingSurvivalAnalysis(loss="squared", n_estimators=100, random_state=0)
        model.fit(whas500_data.x, whas500_data.y)

        time_predicted = model.predict(whas500_data.x)
        time_true = whas500_data.y["lenfol"]
        event_true = whas500_data.y["fstat"]

        rmse_all = numpy.sqrt(mean_squared_error(time_true, time_predicted))
        assert round(abs(rmse_all - 793.6256945839657), 7) == 0

        rmse_uncensored = numpy.sqrt(mean_squared_error(time_true[event_true], time_predicted[event_true]))
        assert round(abs(rmse_uncensored - 542.83358120153525), 7) == 0

        cindex = model.score(whas500_data.x, whas500_data.y)
        assert round(abs(cindex - 0.7777082862), 7) == 0
Example #5
0
    def _create_ensemble(self, **kwargs):
        boosting_grid = ParameterGrid({
            "n_estimators": [100, 250],
            "subsample": [1.0, 0.75, 0.5]
        })
        svm_grid = ParameterGrid({"alpha": 2.**numpy.arange(-9, 5, 2)})

        base_estimators = []
        for i, params in enumerate(boosting_grid):
            model = ComponentwiseGradientBoostingSurvivalAnalysis(
                random_state=0, **params)
            base_estimators.append(("gbm_%d" % i, model))

        for i, params in enumerate(svm_grid):
            model = FastSurvivalSVM(max_iter=100, random_state=0, **params)
            base_estimators.append(("svm_%d" % i, model))

        cv = KFold(n_splits=4, shuffle=True, random_state=0)
        meta = EnsembleSelection(base_estimators,
                                 n_estimators=0.4,
                                 scorer=score_cindex,
                                 cv=cv,
                                 **kwargs)
        return meta
Example #6
0
    def test_fit_verbose(make_whas500):
        whas500_data = make_whas500(with_std=False, to_numeric=True)

        model = ComponentwiseGradientBoostingSurvivalAnalysis(n_estimators=10, verbose=1, random_state=0)
        model.fit(whas500_data.x, whas500_data.y)
 def test_fit_verbose(self):
     model = ComponentwiseGradientBoostingSurvivalAnalysis(n_estimators=10, verbose=1, random_state=0)
     model.fit(self.x, self.y)
    def test_feature_importances(self):
        model = ComponentwiseGradientBoostingSurvivalAnalysis(n_estimators=100, random_state=0)
        model.fit(self.x, self.y)

        self.assertEqual(self.x.shape[1] + 1, len(model.feature_importances_))
# data_y
# df.groupby('status').count()

# Part 2: Componentwise Gradient Boosting for Survival Analysis

from sklearn.model_selection import ShuffleSplit, GridSearchCV

from sksurv.column import encode_categorical
from sksurv.metrics import concordance_index_censored
from sksurv.ensemble import ComponentwiseGradientBoostingSurvivalAnalysis

import warnings
warnings.filterwarnings("ignore", category=UserWarning)

## create estimator
estimator = ComponentwiseGradientBoostingSurvivalAnalysis(loss='coxph',
                                                          random_state=0)


## define a function for evaluating the performance of models during grid search using Harrell's concordance index
def score_survival_model(model, X, y):
    prediction = model.predict(X)
    result = concordance_index_censored(y['status'], y['time_to_event'],
                                        prediction)
    return result[0]


param_grid = {
    'learning_rate': [0.001, 0.01, 0.1, 0.5, 1],
    'n_estimators': [100, 200, 500, 1000]
}