Exemple #1
0
    def test_correlation(self):
        base_estimators = [('gbm',
                            ComponentwiseGradientBoostingSurvivalAnalysis()),
                           ('svm', FastSurvivalSVM())]
        meta = EnsembleSelection(base_estimators,
                                 scorer=score_cindex,
                                 correlation=None)
        self.assertRaisesRegex(
            ValueError,
            "correlation must be one of 'pearson', 'kendall', and 'spearman', but got None",
            meta.fit, self.x, self.y)

        meta = EnsembleSelection(base_estimators,
                                 scorer=score_cindex,
                                 correlation=2143)
        self.assertRaisesRegex(
            ValueError,
            "correlation must be one of 'pearson', 'kendall', and 'spearman', but got 2143",
            meta.fit, self.x, self.y)

        meta = EnsembleSelection(base_estimators,
                                 scorer=score_cindex,
                                 correlation="clearly wrong")
        self.assertRaisesRegex(
            ValueError,
            "correlation must be one of 'pearson', 'kendall', and 'spearman', but got 'clearly wrong'",
            meta.fit, self.x, self.y)
    def test_correlation(make_whas500):
        whas500 = make_whas500(with_mean=False,
                               with_std=False,
                               to_numeric=True)
        base_estimators = [('gbm',
                            ComponentwiseGradientBoostingSurvivalAnalysis()),
                           ('svm', FastSurvivalSVM())]
        meta = EnsembleSelection(base_estimators,
                                 scorer=score_cindex,
                                 correlation=None)
        with pytest.raises(
                ValueError,
                match="correlation must be one of 'pearson', 'kendall', "
                "and 'spearman', but got None"):
            meta.fit(whas500.x, whas500.y)

        meta = EnsembleSelection(base_estimators,
                                 scorer=score_cindex,
                                 correlation=2143)
        with pytest.raises(
                ValueError,
                match="correlation must be one of 'pearson', 'kendall', "
                "and 'spearman', but got 2143"):
            meta.fit(whas500.x, whas500.y)

        meta = EnsembleSelection(base_estimators,
                                 scorer=score_cindex,
                                 correlation="clearly wrong")
        with pytest.raises(
                ValueError,
                match="correlation must be one of 'pearson', 'kendall', "
                "and 'spearman', but got 'clearly wrong'"):
            meta.fit(whas500.x, whas500.y)
def _create_survival_ensemble(**kwargs):
    boosting_grid = ParameterGrid({
        "n_estimators": [100, 250],
        "subsample": [1.0, 0.75, 0.5]
    })
    alphas = numpy.exp(numpy.linspace(numpy.log(0.001), numpy.log(2), 5))
    svm_grid = ParameterGrid({"alpha": alphas})

    base_estimators = []
    for i, params in enumerate(boosting_grid):
        model = ComponentwiseGradientBoostingSurvivalAnalysis(random_state=0,
                                                              **params)
        base_estimators.append(("gbm_%d" % i, model))

    for i, params in enumerate(svm_grid):
        model = FastSurvivalSVM(max_iter=100,
                                tol=1e-6,
                                random_state=0,
                                **params)
        base_estimators.append(("svm_%d" % i, model))

    cv = KFold(n_splits=3, shuffle=True, random_state=0)
    meta = EnsembleSelection(base_estimators,
                             n_estimators=0.4,
                             scorer=score_cindex,
                             cv=cv,
                             **kwargs)
    return meta
    def test_min_correlation(make_whas500):
        whas500 = make_whas500(with_mean=False,
                               with_std=False,
                               to_numeric=True)
        base_estimators = [('gbm',
                            ComponentwiseGradientBoostingSurvivalAnalysis()),
                           ('svm', FastSurvivalSVM())]
        meta = EnsembleSelection(base_estimators,
                                 scorer=score_cindex,
                                 min_correlation=1.2)

        with pytest.raises(
                ValueError,
                match=r"min_correlation must be in \[-1; 1\], but was 1.2"):
            meta.fit(whas500.x, whas500.y)

        meta.set_params(min_correlation=-2.1)
        with pytest.raises(
                ValueError,
                match=r"min_correlation must be in \[-1; 1\], but was -2.1"):
            meta.fit(whas500.x, whas500.y)

        meta.set_params(min_correlation=numpy.nan)
        with pytest.raises(
                ValueError,
                match=r"min_correlation must be in \[-1; 1\], but was nan"):
            meta.fit(whas500.x, whas500.y)
    def test_fit_custom_kernel(make_whas500):
        whas500 = make_whas500(with_mean=False, with_std=False, to_numeric=True)
        alphas = numpy.exp(numpy.linspace(numpy.log(0.001), numpy.log(0.5), 5))
        svm_grid = ParameterGrid({"alpha": alphas})

        transform = ClinicalKernelTransform(fit_once=True)
        transform.prepare(whas500.x_data_frame)

        base_estimators = []
        for i, params in enumerate(svm_grid):
            model = FastSurvivalSVM(max_iter=100, random_state=0, **params)
            base_estimators.append(("svm_linear_%d" % i, model))

        for i, params in enumerate(svm_grid):
            model = FastKernelSurvivalSVM(kernel=transform.pairwise_kernel, max_iter=45, tol=1e-5,
                                          random_state=0, **params)
            base_estimators.append(("svm_kernel_%d" % i, model))

        cv = KFold(n_splits=3, shuffle=True, random_state=0)
        meta = EnsembleSelection(base_estimators, n_estimators=0.4, scorer=score_cindex, cv=cv, n_jobs=4)

        meta.fit(whas500.x, whas500.y)
        assert len(meta) == 10
        assert meta.scores_.shape == (10,)

        p = meta.predict(whas500.x)

        assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'], p,
                                   (0.7978084, 59938, 15178, 33, 119))
    def test_fit_custom_kernel(self):
        alphas = numpy.exp(numpy.linspace(numpy.log(0.001), numpy.log(0.5), 5))
        svm_grid = ParameterGrid({"alpha": alphas})

        transform = ClinicalKernelTransform(fit_once=True)
        transform.prepare(self.x)

        base_estimators = []
        for i, params in enumerate(svm_grid):
            model = FastSurvivalSVM(max_iter=100, random_state=0, **params)
            base_estimators.append(("svm_linear_%d" % i, model))

        for i, params in enumerate(svm_grid):
            model = FastKernelSurvivalSVM(kernel=transform.pairwise_kernel, max_iter=45, tol=1e-5,
                                          random_state=0, **params)
            base_estimators.append(("svm_kernel_%d" % i, model))

        cv = KFold(n_splits=3, shuffle=True, random_state=0)
        meta = EnsembleSelection(base_estimators, n_estimators=0.4, scorer=score_cindex, cv=cv, n_jobs=4)

        meta.fit(self.x.values, self.y)
        self.assertEqual(len(meta), 10)
        self.assertTupleEqual(meta.scores_.shape, (10,))

        p = meta.predict(self.x.values)

        score = concordance_index_censored(self.y['fstat'], self.y['lenfol'], p)
        expected_score = numpy.array([0.7978084, 59938, 15178, 33, 119])
        assert_array_almost_equal(score, expected_score)
    def test_min_score(self):
        base_estimators = [('gbm', ComponentwiseGradientBoostingSurvivalAnalysis()),
                           ('svm', FastSurvivalSVM())]
        meta = EnsembleSelection(base_estimators, scorer=score_cindex, min_score=1.0)

        self.assertRaisesRegex(ValueError, "no base estimator exceeds min_score, try decreasing it",
                               meta.fit, self.x, self.y)
    def test_min_score(make_whas500):
        whas500 = make_whas500(with_mean=False, with_std=False, to_numeric=True)
        base_estimators = [('gbm', ComponentwiseGradientBoostingSurvivalAnalysis()),
                           ('svm', FastSurvivalSVM())]
        meta = EnsembleSelection(base_estimators, scorer=score_cindex, min_score=1.0, cv=3)

        with pytest.raises(ValueError,
                           match="no base estimator exceeds min_score, try decreasing it"):
            meta.fit(whas500.x, whas500.y)
    def test_n_estimators(self):
        base_estimators = [('gbm', ComponentwiseGradientBoostingSurvivalAnalysis()),
                           ('svm', FastSurvivalSVM())]
        meta = EnsembleSelection(base_estimators, scorer=score_cindex, n_estimators=0)

        self.assertRaisesRegex(ValueError, "n_estimators must not be zero or negative",
                               meta.fit, self.x, self.y)

        meta.set_params(n_estimators=1000)
        self.assertRaisesRegex(ValueError, r"n_estimators \(1000\) must not exceed number of base learners \(2\)",
                               meta.fit, self.x, self.y)
    def test_scorer(self):
        base_estimators = [('gbm', ComponentwiseGradientBoostingSurvivalAnalysis()),
                           ('svm', FastSurvivalSVM())]
        meta = EnsembleSelection(base_estimators, scorer=None)

        self.assertRaisesRegex(TypeError, "scorer is not callable",
                               meta.fit, self.x, self.y)

        meta.set_params(scorer=numpy.zeros(10))
        self.assertRaisesRegex(TypeError, "scorer is not callable",
                               meta.fit, self.x, self.y)
    def test_scorer(make_whas500):
        whas500 = make_whas500(with_mean=False, with_std=False, to_numeric=True)
        base_estimators = [('gbm', ComponentwiseGradientBoostingSurvivalAnalysis()),
                           ('svm', FastSurvivalSVM())]
        meta = EnsembleSelection(base_estimators, scorer=None)

        with pytest.raises(TypeError, match="scorer is not callable"):
            meta.fit(whas500.x, whas500.y)

        meta.set_params(scorer=numpy.zeros(10))
        with pytest.raises(TypeError, match="scorer is not callable"):
            meta.fit(whas500.x, whas500.y)
    def test_n_estimators(make_whas500):
        whas500 = make_whas500(with_mean=False, with_std=False, to_numeric=True)
        base_estimators = [('gbm', ComponentwiseGradientBoostingSurvivalAnalysis()),
                           ('svm', FastSurvivalSVM())]
        meta = EnsembleSelection(base_estimators, scorer=score_cindex, n_estimators=0)

        with pytest.raises(ValueError, match="n_estimators must not be zero or negative"):
            meta.fit(whas500.x, whas500.y)

        meta.set_params(n_estimators=1000)
        with pytest.raises(ValueError,
                           match=r"n_estimators \(1000\) must not exceed number "
                                 r"of base learners \(2\)"):
            meta.fit(whas500.x, whas500.y)
    def test_min_correlation(self):
        base_estimators = [('gbm', ComponentwiseGradientBoostingSurvivalAnalysis()),
                           ('svm', FastSurvivalSVM())]
        meta = EnsembleSelection(base_estimators, scorer=score_cindex, min_correlation=1.2)

        self.assertRaisesRegex(ValueError, r"min_correlation must be in \[-1; 1\], but was 1.2",
                               meta.fit, self.x, self.y)

        meta.set_params(min_correlation=-2.1)
        self.assertRaisesRegex(ValueError, r"min_correlation must be in \[-1; 1\], but was -2.1",
                               meta.fit, self.x, self.y)

        meta.set_params(min_correlation=numpy.nan)
        self.assertRaisesRegex(ValueError, r"min_correlation must be in \[-1; 1\], but was nan",
                               meta.fit, self.x, self.y)
Exemple #14
0
    def _create_ensemble(self, **kwargs):
        boosting_grid = ParameterGrid({
            "n_estimators": [100, 250],
            "subsample": [1.0, 0.75, 0.5]
        })
        svm_grid = ParameterGrid({"alpha": 2.**numpy.arange(-9, 5, 2)})

        base_estimators = []
        for i, params in enumerate(boosting_grid):
            model = ComponentwiseGradientBoostingSurvivalAnalysis(
                random_state=0, **params)
            base_estimators.append(("gbm_%d" % i, model))

        for i, params in enumerate(svm_grid):
            model = FastSurvivalSVM(max_iter=100, random_state=0, **params)
            base_estimators.append(("svm_%d" % i, model))

        cv = KFold(n_splits=4, shuffle=True, random_state=0)
        meta = EnsembleSelection(base_estimators,
                                 n_estimators=0.4,
                                 scorer=score_cindex,
                                 cv=cv,
                                 **kwargs)
        return meta