def test_correlation(self): base_estimators = [('gbm', ComponentwiseGradientBoostingSurvivalAnalysis()), ('svm', FastSurvivalSVM())] meta = EnsembleSelection(base_estimators, scorer=score_cindex, correlation=None) self.assertRaisesRegex( ValueError, "correlation must be one of 'pearson', 'kendall', and 'spearman', but got None", meta.fit, self.x, self.y) meta = EnsembleSelection(base_estimators, scorer=score_cindex, correlation=2143) self.assertRaisesRegex( ValueError, "correlation must be one of 'pearson', 'kendall', and 'spearman', but got 2143", meta.fit, self.x, self.y) meta = EnsembleSelection(base_estimators, scorer=score_cindex, correlation="clearly wrong") self.assertRaisesRegex( ValueError, "correlation must be one of 'pearson', 'kendall', and 'spearman', but got 'clearly wrong'", meta.fit, self.x, self.y)
def test_fit_custom_kernel(make_whas500): whas500 = make_whas500(with_mean=False, with_std=False, to_numeric=True) alphas = numpy.exp(numpy.linspace(numpy.log(0.001), numpy.log(0.5), 5)) svm_grid = ParameterGrid({"alpha": alphas}) transform = ClinicalKernelTransform(fit_once=True) transform.prepare(whas500.x_data_frame) base_estimators = [] for i, params in enumerate(svm_grid): model = FastSurvivalSVM(max_iter=100, random_state=0, **params) base_estimators.append(("svm_linear_%d" % i, model)) for i, params in enumerate(svm_grid): model = FastKernelSurvivalSVM(kernel=transform.pairwise_kernel, max_iter=45, tol=1e-5, random_state=0, **params) base_estimators.append(("svm_kernel_%d" % i, model)) cv = KFold(n_splits=3, shuffle=True, random_state=0) meta = EnsembleSelection(base_estimators, n_estimators=0.4, scorer=score_cindex, cv=cv, n_jobs=4) meta.fit(whas500.x, whas500.y) assert len(meta) == 10 assert meta.scores_.shape == (10,) p = meta.predict(whas500.x) assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'], p, (0.7978084, 59938, 15178, 33, 119))
def test_fit_custom_kernel(self): alphas = numpy.exp(numpy.linspace(numpy.log(0.001), numpy.log(0.5), 5)) svm_grid = ParameterGrid({"alpha": alphas}) transform = ClinicalKernelTransform(fit_once=True) transform.prepare(self.x) base_estimators = [] for i, params in enumerate(svm_grid): model = FastSurvivalSVM(max_iter=100, random_state=0, **params) base_estimators.append(("svm_linear_%d" % i, model)) for i, params in enumerate(svm_grid): model = FastKernelSurvivalSVM(kernel=transform.pairwise_kernel, max_iter=45, tol=1e-5, random_state=0, **params) base_estimators.append(("svm_kernel_%d" % i, model)) cv = KFold(n_splits=3, shuffle=True, random_state=0) meta = EnsembleSelection(base_estimators, n_estimators=0.4, scorer=score_cindex, cv=cv, n_jobs=4) meta.fit(self.x.values, self.y) self.assertEqual(len(meta), 10) self.assertTupleEqual(meta.scores_.shape, (10,)) p = meta.predict(self.x.values) score = concordance_index_censored(self.y['fstat'], self.y['lenfol'], p) expected_score = numpy.array([0.7978084, 59938, 15178, 33, 119]) assert_array_almost_equal(score, expected_score)
def test_min_score(make_whas500): whas500 = make_whas500(with_mean=False, with_std=False, to_numeric=True) base_estimators = [('gbm', ComponentwiseGradientBoostingSurvivalAnalysis()), ('svm', FastSurvivalSVM())] meta = EnsembleSelection(base_estimators, scorer=score_cindex, min_score=1.0, cv=3) with pytest.raises(ValueError, match="no base estimator exceeds min_score, try decreasing it"): meta.fit(whas500.x, whas500.y)
def test_n_estimators(self): base_estimators = [('gbm', ComponentwiseGradientBoostingSurvivalAnalysis()), ('svm', FastSurvivalSVM())] meta = EnsembleSelection(base_estimators, scorer=score_cindex, n_estimators=0) self.assertRaisesRegex(ValueError, "n_estimators must not be zero or negative", meta.fit, self.x, self.y) meta.set_params(n_estimators=1000) self.assertRaisesRegex(ValueError, r"n_estimators \(1000\) must not exceed number of base learners \(2\)", meta.fit, self.x, self.y)
def test_scorer(self): base_estimators = [('gbm', ComponentwiseGradientBoostingSurvivalAnalysis()), ('svm', FastSurvivalSVM())] meta = EnsembleSelection(base_estimators, scorer=None) self.assertRaisesRegex(TypeError, "scorer is not callable", meta.fit, self.x, self.y) meta.set_params(scorer=numpy.zeros(10)) self.assertRaisesRegex(TypeError, "scorer is not callable", meta.fit, self.x, self.y)
def _create_survival_ensemble(**kwargs): boosting_grid = ParameterGrid({ "n_estimators": [100, 250], "subsample": [1.0, 0.75, 0.5] }) alphas = numpy.exp(numpy.linspace(numpy.log(0.001), numpy.log(2), 5)) svm_grid = ParameterGrid({"alpha": alphas}) base_estimators = [] for i, params in enumerate(boosting_grid): model = ComponentwiseGradientBoostingSurvivalAnalysis(random_state=0, **params) base_estimators.append(("gbm_%d" % i, model)) for i, params in enumerate(svm_grid): model = FastSurvivalSVM(max_iter=100, tol=1e-6, random_state=0, **params) base_estimators.append(("svm_%d" % i, model)) cv = KFold(n_splits=3, shuffle=True, random_state=0) meta = EnsembleSelection(base_estimators, n_estimators=0.4, scorer=score_cindex, cv=cv, **kwargs) return meta
def test_min_score(self): base_estimators = [('gbm', ComponentwiseGradientBoostingSurvivalAnalysis()), ('svm', FastSurvivalSVM())] meta = EnsembleSelection(base_estimators, scorer=score_cindex, min_score=1.0) self.assertRaisesRegex(ValueError, "no base estimator exceeds min_score, try decreasing it", meta.fit, self.x, self.y)
def test_scorer(make_whas500): whas500 = make_whas500(with_mean=False, with_std=False, to_numeric=True) base_estimators = [('gbm', ComponentwiseGradientBoostingSurvivalAnalysis()), ('svm', FastSurvivalSVM())] meta = EnsembleSelection(base_estimators, scorer=None) with pytest.raises(TypeError, match="scorer is not callable"): meta.fit(whas500.x, whas500.y) meta.set_params(scorer=numpy.zeros(10)) with pytest.raises(TypeError, match="scorer is not callable"): meta.fit(whas500.x, whas500.y)
def test_n_estimators(make_whas500): whas500 = make_whas500(with_mean=False, with_std=False, to_numeric=True) base_estimators = [('gbm', ComponentwiseGradientBoostingSurvivalAnalysis()), ('svm', FastSurvivalSVM())] meta = EnsembleSelection(base_estimators, scorer=score_cindex, n_estimators=0) with pytest.raises(ValueError, match="n_estimators must not be zero or negative"): meta.fit(whas500.x, whas500.y) meta.set_params(n_estimators=1000) with pytest.raises(ValueError, match=r"n_estimators \(1000\) must not exceed number " r"of base learners \(2\)"): meta.fit(whas500.x, whas500.y)
def test_min_correlation(self): base_estimators = [('gbm', ComponentwiseGradientBoostingSurvivalAnalysis()), ('svm', FastSurvivalSVM())] meta = EnsembleSelection(base_estimators, scorer=score_cindex, min_correlation=1.2) self.assertRaisesRegex(ValueError, r"min_correlation must be in \[-1; 1\], but was 1.2", meta.fit, self.x, self.y) meta.set_params(min_correlation=-2.1) self.assertRaisesRegex(ValueError, r"min_correlation must be in \[-1; 1\], but was -2.1", meta.fit, self.x, self.y) meta.set_params(min_correlation=numpy.nan) self.assertRaisesRegex(ValueError, r"min_correlation must be in \[-1; 1\], but was nan", meta.fit, self.x, self.y)
def _create_ensemble(self, **kwargs): boosting_grid = ParameterGrid({ "n_estimators": [100, 250], "subsample": [1.0, 0.75, 0.5] }) svm_grid = ParameterGrid({"alpha": 2.**numpy.arange(-9, 5, 2)}) base_estimators = [] for i, params in enumerate(boosting_grid): model = ComponentwiseGradientBoostingSurvivalAnalysis( random_state=0, **params) base_estimators.append(("gbm_%d" % i, model)) for i, params in enumerate(svm_grid): model = FastSurvivalSVM(max_iter=100, random_state=0, **params) base_estimators.append(("svm_%d" % i, model)) cv = KFold(n_splits=4, shuffle=True, random_state=0) meta = EnsembleSelection(base_estimators, n_estimators=0.4, scorer=score_cindex, cv=cv, **kwargs) return meta
def test_correlation(make_whas500): whas500 = make_whas500(with_mean=False, with_std=False, to_numeric=True) base_estimators = [('gbm', ComponentwiseGradientBoostingSurvivalAnalysis()), ('svm', FastSurvivalSVM())] meta = EnsembleSelection(base_estimators, scorer=score_cindex, correlation=None) with pytest.raises( ValueError, match="correlation must be one of 'pearson', 'kendall', " "and 'spearman', but got None"): meta.fit(whas500.x, whas500.y) meta = EnsembleSelection(base_estimators, scorer=score_cindex, correlation=2143) with pytest.raises( ValueError, match="correlation must be one of 'pearson', 'kendall', " "and 'spearman', but got 2143"): meta.fit(whas500.x, whas500.y) meta = EnsembleSelection(base_estimators, scorer=score_cindex, correlation="clearly wrong") with pytest.raises( ValueError, match="correlation must be one of 'pearson', 'kendall', " "and 'spearman', but got 'clearly wrong'"): meta.fit(whas500.x, whas500.y)
def test_min_correlation(make_whas500): whas500 = make_whas500(with_mean=False, with_std=False, to_numeric=True) base_estimators = [('gbm', ComponentwiseGradientBoostingSurvivalAnalysis()), ('svm', FastSurvivalSVM())] meta = EnsembleSelection(base_estimators, scorer=score_cindex, min_correlation=1.2) with pytest.raises( ValueError, match=r"min_correlation must be in \[-1; 1\], but was 1.2"): meta.fit(whas500.x, whas500.y) meta.set_params(min_correlation=-2.1) with pytest.raises( ValueError, match=r"min_correlation must be in \[-1; 1\], but was -2.1"): meta.fit(whas500.x, whas500.y) meta.set_params(min_correlation=numpy.nan) with pytest.raises( ValueError, match=r"min_correlation must be in \[-1; 1\], but was nan"): meta.fit(whas500.x, whas500.y)