def test_fit_subsample(make_whas500): whas500_data = make_whas500(with_std=False, to_numeric=True) model = GradientBoostingSurvivalAnalysis(n_estimators=50, max_features=8, subsample=0.6, random_state=0) model.fit(whas500_data.x, whas500_data.y) assert model.max_features_ == 8 assert hasattr(model, "oob_improvement_") incl_mask = numpy.ones(whas500_data.x.shape[0], dtype=bool) incl_mask[[35, 111, 174, 206, 236, 268, 497]] = False x_test = whas500_data.x[incl_mask] y_test = whas500_data.y[incl_mask] p = model.predict(x_test) assert_cindex_almost_equal(y_test['fstat'], y_test['lenfol'], p, (0.8330510326740247, 60985, 12221, 2, 14)) assert (50,) == model.train_score_.shape assert (50,) == model.oob_improvement_.shape with pytest.raises(ValueError, match="Number of features of the model must match the input. " "Model n_features is 14 and input n_features is 2 "): model.predict(whas500_data.x[:, :2])
def test_fit(make_whas500): whas500_data = make_whas500(with_std=False, to_numeric=True) model = ComponentwiseGradientBoostingSurvivalAnalysis(n_estimators=100) model.fit(whas500_data.x, whas500_data.y) p = model.predict(whas500_data.x) assert_cindex_almost_equal(whas500_data.y['fstat'], whas500_data.y['lenfol'], p, (0.7755659, 58283, 16866, 0, 14)) expected_coef = pandas.Series(numpy.zeros(15, dtype=float), index=whas500_data.names) expected_coef['age'] = 0.040919 expected_coef['hr'] = 0.004977 expected_coef['diasbp'] = -0.003407 expected_coef['bmi'] = -0.017938 expected_coef['sho'] = 0.429904 expected_coef['chf'] = 0.508211 assert_array_almost_equal(expected_coef.values, model.coef_) assert (100,) == model.train_score_.shape with pytest.raises(ValueError, match='Dimensions of X are inconsistent with training data: ' 'expected 14 features, but got 2'): model.predict(whas500_data.x[:, :2])
def test_fit_subsample(make_whas500): whas500_data = make_whas500(with_std=False, to_numeric=True) model = ComponentwiseGradientBoostingSurvivalAnalysis(n_estimators=100, subsample=0.6, random_state=0) model.fit(whas500_data.x, whas500_data.y) p = model.predict(whas500_data.x) assert_cindex_almost_equal(whas500_data.y['fstat'], whas500_data.y['lenfol'], p, (0.7750602, 58245, 16904, 0, 14)) expected_coef = pandas.Series(numpy.zeros(15, dtype=float), index=whas500_data.names) expected_coef['age'] = 0.041299 expected_coef['hr'] = 0.00487 expected_coef['diasbp'] = -0.003381 expected_coef['bmi'] = -0.017018 expected_coef['sho'] = 0.433685 expected_coef['chf'] = 0.510277 assert_array_almost_equal(expected_coef.values, model.coef_) assert (100,) == model.train_score_.shape assert (100,) == model.oob_improvement_.shape with pytest.raises(ValueError, match='Dimensions of X are inconsistent with training data: ' 'expected 14 features, but got 2'): model.predict(whas500_data.x[:, :2])
def test_compare_clinical_kernel(make_whas500): whas500 = make_whas500(to_numeric=True) trans = ClinicalKernelTransform() trans.fit(whas500.x_data_frame) kpca = KernelPCA(kernel=trans.pairwise_kernel, copy_X=True) xt = kpca.fit_transform(whas500.x) nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=500, random_state=0) nrsvm.fit(xt, whas500.y) rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel=trans.pairwise_kernel, tol=1e-8, max_iter=500, random_state=0) rsvm.fit(whas500.x, whas500.y) pred_nrsvm = nrsvm.predict(kpca.transform(whas500.x)) pred_rsvm = rsvm.predict(whas500.x) assert len(pred_nrsvm) == len(pred_rsvm) expected_cindex = concordance_index_censored(whas500.y['fstat'], whas500.y['lenfol'], pred_nrsvm) assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'], pred_rsvm, expected_cindex)
def test_survival_squared_hinge_loss(whas500_without_ties): x, y = whas500_without_ties nrsvm = NaiveSurvivalSVM(loss='squared_hinge', dual=False, tol=8e-7, max_iter=1000, random_state=0) nrsvm.fit(x, y) rsvm = FastSurvivalSVM(optimizer='avltree', tol=8e-7, max_iter=1000, random_state=0) rsvm.fit(x, y) assert_array_almost_equal(nrsvm.coef_.ravel(), rsvm.coef_, 3) pred_nrsvm = nrsvm.predict(x) pred_rsvm = rsvm.predict(x) assert len(pred_nrsvm) == len(pred_rsvm) expected_cindex = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm) assert_cindex_almost_equal(y['fstat'], y['lenfol'], pred_rsvm, expected_cindex)
def test_compare_builtin_kernel(make_whas500): whas500 = make_whas500(to_numeric=True) x = normalize(whas500.x) rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel="polynomial", gamma=0.5, degree=2, tol=1e-8, max_iter=100, random_state=0xf38) rsvm.fit(x, whas500.y) pred_rsvm = rsvm.predict(x) kpca = KernelPCA(kernel="polynomial", copy_X=True, gamma=0.5, degree=2, random_state=0xf38) xt = kpca.fit_transform(x) nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=100, random_state=0xf38) nrsvm.fit(xt, whas500.y) pred_nrsvm = nrsvm.predict(xt) assert len(pred_nrsvm) == len(pred_rsvm) expected_cindex = concordance_index_censored(whas500.y['fstat'], whas500.y['lenfol'], pred_nrsvm) assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'], pred_rsvm, expected_cindex)
def test_fit_custom_kernel(make_whas500): whas500 = make_whas500(with_mean=False, with_std=False, to_numeric=True) alphas = numpy.exp(numpy.linspace(numpy.log(0.001), numpy.log(0.5), 5)) svm_grid = ParameterGrid({"alpha": alphas}) transform = ClinicalKernelTransform(fit_once=True) transform.prepare(whas500.x_data_frame) base_estimators = [] for i, params in enumerate(svm_grid): model = FastSurvivalSVM(max_iter=100, random_state=0, **params) base_estimators.append(("svm_linear_%d" % i, model)) for i, params in enumerate(svm_grid): model = FastKernelSurvivalSVM(kernel=transform.pairwise_kernel, max_iter=45, tol=1e-5, random_state=0, **params) base_estimators.append(("svm_kernel_%d" % i, model)) cv = KFold(n_splits=3, shuffle=True, random_state=0) meta = EnsembleSelection(base_estimators, n_estimators=0.4, scorer=score_cindex, cv=cv, n_jobs=4) meta.fit(whas500.x, whas500.y) assert len(meta) == 10 assert meta.scores_.shape == (10,) p = meta.predict(whas500.x) assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'], p, (0.7978084, 59938, 15178, 33, 14))
def test_toy_minlip_predict_1_osqp(self, toy_data): x, y = toy_data m = self.minlip_model m.fit(x, y) p = m.predict(x) assert_cindex_almost_equal(y['status'], y['time'], p, (1.0, 11, 0, 0, 0))
def test_toy_minlip_predict_1_cvxopt(self, toy_data): x, y = toy_data m = self.minlip_model with pytest.deprecated_call(): m.fit(x, y) p = m.predict(x) assert_cindex_almost_equal(y['status'], y['time'], p, (1.0, 11, 0, 0, 0))
def test_breast_cancer_cvxpy(gbsg2): x, y = gbsg2 m = MinlipSurvivalAnalysis(solver="cvxpy", alpha=1, pairs="next") m.fit(x, y) assert (1, x.shape[0]) == m.coef_.shape p = m.predict(x) assert_cindex_almost_equal(y['cens'], y['time'], p, (0.59576770470121443, 79280, 53792, 0, 32))
def test_breast_cancer_cvxopt(self, gbsg2): x, y = gbsg2 m = self.model m.fit(x, y) assert (1, x.shape[0]) == m.coef_.shape p = m.predict(x) assert_cindex_almost_equal(y['cens'], y['time'], p, (0.59570007214139709, 79271, 53801, 0, 42))
def test_toy_hinge_fit(self, toy_data): x, y = toy_data m = self.svm_model m.fit(x, y) assert (1, x.shape[0]) == m.coef_.shape p = m.predict(x) assert_cindex_almost_equal(y['status'], y['time'], p, (1.0, 11, 0, 0, 0))
def test_fit_no_bootstrap(make_whas500): whas500 = make_whas500(to_numeric=True) forest = RandomSurvivalForest(n_estimators=10, bootstrap=False, random_state=2) forest.fit(whas500.x, whas500.y) pred = forest.predict(whas500.x) expected_c = (0.931881994437717, 70030, 5119, 0, 14) assert_cindex_almost_equal( whas500.y["fstat"], whas500.y["lenfol"], pred, expected_c)
def test_breast_cancer_ecos(gbsg2): x, y = gbsg2 x = scale(x) m = MinlipSurvivalAnalysis(solver="ecos", alpha=1, pairs="next") m.fit(x, y) assert (1, x.shape[0]) == m.coef_.shape p = m.predict(x) assert_cindex_almost_equal(y['cens'], y['time'], p, (0.5990741854033906, 79720, 53352, 0, 42))
def test_fit_spearman_correlation(make_whas500): whas500 = make_whas500(with_mean=False, with_std=False, to_numeric=True) meta = _create_survival_ensemble(correlation="spearman") assert len(meta) == 0 meta.fit(whas500.x, whas500.y) p = meta.predict(whas500.x) assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'], p, (0.7863312, 59088, 16053, 8, 119))
def test_fit_kendall_correlation(make_whas500): whas500 = make_whas500(with_mean=False, with_std=False, to_numeric=True) meta = _create_survival_ensemble(correlation="kendall") assert len(meta) == 0 meta.fit(whas500.x, whas500.y) p = meta.predict(whas500.x) assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'], p, (0.7663043, 57570, 17545, 34, 119))
def test_breast_cancer_osqp(gbsg2): x, y = gbsg2 x = scale(x) m = MinlipSurvivalAnalysis(solver="osqp", alpha=1, pairs="next") m.fit(x, y) assert (1, x.shape[0]) == m.coef_.shape p = m.predict(x) assert_cindex_almost_equal(y['cens'], y['time'], p, (0.599066670674522, 79719, 53353, 0, 42))
def test_breast_cancer_rbf_cvxpy(gbsg2): x, y = gbsg2 x = scale(x) m = MinlipSurvivalAnalysis(solver="cvxpy", alpha=1, kernel="rbf", gamma=1./8, pairs="next", max_iter=1000) m.fit(x, y) assert (1, x.shape[0]) == m.coef_.shape p = m.predict(x) assert_cindex_almost_equal(y['cens'], y['time'], p, (0.6105867500300589, 81252, 51820, 0, 42))
def test_predict(make_whas500): whas500 = make_whas500() model = IPCRidge() model.fit(whas500.x[:400], whas500.y[:400]) x_test = whas500.x[400:] y_test = whas500.y[400:] p = model.predict(x_test) assert_cindex_almost_equal(y_test['fstat'], y_test['lenfol'], -p, (0.66925817946226107, 2066, 1021, 0, 1)) assert model.score(x_test, y_test) == 1.0 - 0.66925817946226107
def test_breast_cancer_rbf_cvxopt(self, gbsg2): x, y = gbsg2 x = scale(x) m = self.model m.set_params(kernel="rbf", gamma=1./8) m.fit(x, y) assert (1, x.shape[0]) == m.coef_.shape p = m.predict(x) assert_cindex_almost_equal(y['cens'], y['time'], p, (0.6106092942166647, 81255, 51817, 0, 42))
def test_breast_cancer_rbf_cvxopt(self, gbsg2): x, y = gbsg2 x = scale(x) m = self.model m.set_params(kernel="rbf", gamma=32) m.fit(x, y) assert (1, x.shape[0]) == m.coef_.shape p = m.predict(x) assert_cindex_almost_equal(y['cens'], y['time'], p, (0.6487427858602861, 85974, 46387, 711, 32))
def test_fit(make_whas500): whas500 = make_whas500(with_mean=False, with_std=False, to_numeric=True) meta = _create_survival_ensemble() assert len(meta) == 0 meta.fit(whas500.x, whas500.y) assert len(meta) == 11 assert meta.scores_.shape == (11,) p = meta.predict(whas500.x) assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'], p, (0.7863312, 59088, 16053, 8, 119))
def test_predict(make_whas500): whas500 = make_whas500(with_mean=False, with_std=False, to_numeric=True) meta = Stacking(MeanEstimator(), [('coxph', CoxPHSurvivalAnalysis()), ('svm', FastSurvivalSVM(random_state=0))], probabilities=False) meta.fit(whas500.x, whas500.y) # result is different if randomForestSRC has not been compiled with OpenMP support p = meta.predict(whas500.x) assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'], p, (0.7848807, 58983, 16166, 0, 14))
def test_fit_predict(make_whas500, name, expected_c): whas500 = make_whas500(to_numeric=True) forest = FORESTS[name](random_state=2) forest.fit(whas500.x, whas500.y) assert len(forest.estimators_) == 100 pred = forest.predict(whas500.x) assert numpy.isfinite(pred).all() assert numpy.all(pred >= 0) assert_cindex_almost_equal(whas500.y["fstat"], whas500.y["lenfol"], pred, expected_c)
def test_fit_dropout(make_whas500): whas500_data = make_whas500(with_std=False, to_numeric=True) model = GradientBoostingSurvivalAnalysis(n_estimators=100, max_features=8, learning_rate=1.0, dropout_rate=0.03, random_state=0) model.fit(whas500_data.x, whas500_data.y) assert not hasattr(model, "oob_improvement_") assert model.max_features_ == 8 p = model.predict(whas500_data.x) assert_cindex_almost_equal(whas500_data.y['fstat'], whas500_data.y['lenfol'], p, (0.9094333, 68343, 6806, 0, 119))
def test_fit_predict(make_whas500): whas500 = make_whas500(to_numeric=True) forest = RandomSurvivalForest(random_state=2) forest.fit(whas500.x, whas500.y) assert len(forest.estimators_) == 100 pred = forest.predict(whas500.x) assert numpy.isfinite(pred).all() assert numpy.all(pred >= 0) expected_c = (0.9026201280123488, 67831, 7318, 0, 14) assert_cindex_almost_equal(whas500.y["fstat"], whas500.y["lenfol"], pred, expected_c)
def test_toy_hinge_fit(self, toy_data): x, y = toy_data m = self.svm_model sd = numpy.std(x, axis=0) m.fit(x / sd, y) assert (1, x.shape[0]) == m.coef_.shape assert 1 == m.coef0 expected_coef = numpy.array( [[-1.893832101337, 1.083653895940, 0.810178205398, -2., 2., 0.]]) assert_array_almost_equal(m.coef_, expected_coef) p = m.predict(x / sd) assert_cindex_almost_equal(y['status'], y['time'], p, (1.0, 11, 0, 0, 0))
def test_breast_cancer_rbf_osqp(gbsg2): x, y = gbsg2 x = scale(x) m = MinlipSurvivalAnalysis(solver="osqp", alpha=1, kernel="rbf", gamma=1. / 8, pairs="next", max_iter=1000) m.fit(x, y) assert (1, x.shape[0]) == m.coef_.shape p = m.predict(x) assert_cindex_almost_equal(y['cens'], y['time'], p, (0.6106168089455333, 81256, 51816, 0, 42))
def test_toy_hinge_nearest_fit(self, toy_data): x, y = toy_data m = self.svm_model m.set_params(pairs="nearest") sd = numpy.std(x, axis=0) with pytest.deprecated_call(): m.fit(x / sd, y) assert (1, x.shape[0]) == m.coef_.shape assert 1 == m.coef0 expected_coef = numpy.array( [[-1.893832101337, 1.083653895940, 0.810178205398, -2., 2., 0.]]) assert_array_almost_equal(m.coef_, expected_coef, decimal=5) p = m.predict(x / sd) assert_cindex_almost_equal(y['status'], y['time'], p, (1.0, 11, 0, 0, 0))
def test_fit_int_param_as_float(make_whas500): whas500_data = make_whas500(with_std=False, to_numeric=True) model = GradientBoostingSurvivalAnalysis(n_estimators=100.0, max_depth=3.0, min_samples_split=10.0, random_state=0) params = model.get_params() assert 100 == params["n_estimators"] assert 3 == params["max_depth"] assert 10 == params["min_samples_split"] model.set_params(max_leaf_nodes=15.0) assert 15 == model.get_params()["max_leaf_nodes"] model.fit(whas500_data.x, whas500_data.y) p = model.predict(whas500_data.x) assert_cindex_almost_equal(whas500_data.y['fstat'], whas500_data.y['lenfol'], p, (0.90256690042449006, 67826, 7321, 2, 119))