def test_compare_builtin_kernel(make_whas500): whas500 = make_whas500(to_numeric=True) x = normalize(whas500.x) rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel="polynomial", gamma=0.5, degree=2, tol=1e-8, max_iter=100, random_state=0xf38) rsvm.fit(x, whas500.y) pred_rsvm = rsvm.predict(x) kpca = KernelPCA(kernel="polynomial", copy_X=True, gamma=0.5, degree=2, random_state=0xf38) xt = kpca.fit_transform(x) nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=100, random_state=0xf38) nrsvm.fit(xt, whas500.y) pred_nrsvm = nrsvm.predict(xt) assert len(pred_nrsvm) == len(pred_rsvm) expected_cindex = concordance_index_censored(whas500.y['fstat'], whas500.y['lenfol'], pred_nrsvm) assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'], pred_rsvm, expected_cindex)
def test_compare_clinical_kernel(self): x_full, y = load_whas500() trans = ClinicalKernelTransform() trans.fit(x_full) kpca = KernelPCA(kernel=trans.pairwise_kernel, copy_X=True) xt = kpca.fit_transform(self.x) nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=500, random_state=0) nrsvm.fit(xt, y) rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel=trans.pairwise_kernel, tol=1e-8, max_iter=500, random_state=0) rsvm.fit(self.x.values, y) pred_nrsvm = nrsvm.predict(kpca.transform(self.x)) pred_rsvm = rsvm.predict(self.x.values) self.assertEqual(len(pred_nrsvm), len(pred_rsvm)) c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm) c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm) self.assertAlmostEqual(c1[0], c2[0]) self.assertTupleEqual(c1[1:], c2[1:])
def test_survival_squared_hinge_loss(self): x, y = self.get_data_without_ties() nrsvm = NaiveSurvivalSVM(loss='squared_hinge', dual=False, tol=8e-7, max_iter=1000, random_state=0) nrsvm.fit(x, y) rsvm = FastSurvivalSVM(optimizer='avltree', tol=8e-7, max_iter=1000, random_state=0) rsvm.fit(x, y) assert_array_almost_equal(nrsvm.coef_.ravel(), rsvm.coef_, 3) pred_nrsvm = nrsvm.predict(x) pred_rsvm = rsvm.predict(x) self.assertEqual(len(pred_nrsvm), len(pred_rsvm)) c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm) c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm) self.assertAlmostEqual(c1[0], c2[0]) self.assertTupleEqual(c1[1:], c2[1:])
def test_fit_and_predict_hybrid(self): if self.OPTIMIZER in {'simple', 'PRSVM'}: raise unittest.SkipTest("regression not implemented for " + self.OPTIMIZER) ssvm = FastSurvivalSVM(optimizer=self.OPTIMIZER, rank_ratio=0.5, max_iter=50, fit_intercept=True, random_state=0) ssvm.fit(self.x.values, self.y) self.assertAlmostEqual(6.1409367385513729, ssvm.intercept_) expected_coef = numpy.array([ -0.0209254120718, -0.265768317208, -0.154254689136, 0.0800600947891, -0.290121131022, -0.0288851785213, 0.0998004550073, 0.0454100937492, -0.125863947621, 0.0343588337797, -0.000710219364914, 0.0546969104996, -0.5375338235, -0.0137995110308 ]) assert_array_almost_equal(expected_coef, ssvm.coef_) pred = ssvm.predict(self.x.values) rmse = numpy.sqrt(mean_squared_error(self.y['lenfol'], pred)) self.assertAlmostEqual(780.52617631863893, rmse)
def test_compare_builtin_kernel(self): x = normalize(self.x) y = self.y rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel="polynomial", gamma=0.5, degree=2, tol=1e-8, max_iter=100, random_state=0xf38) rsvm.fit(x, y) pred_rsvm = rsvm.predict(x) kpca = KernelPCA(kernel="polynomial", copy_X=True, gamma=0.5, degree=2, random_state=0xf38) xt = kpca.fit_transform(x) nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=100, random_state=0xf38) nrsvm.fit(xt, y) pred_nrsvm = nrsvm.predict(xt) self.assertEqual(len(pred_nrsvm), len(pred_rsvm)) c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm) c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm) self.assertAlmostEqual(c1[0], c2[0]) self.assertTupleEqual(c1[1:], c2[1:])
def test_default_optimizer(make_whas500): whas500 = make_whas500(to_numeric=True) ssvm = FastSurvivalSVM(tol=1e-4, max_iter=25) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=ConvergenceWarning) ssvm.fit(whas500.x, whas500.y) assert 'avltree' == ssvm.optimizer
def test_unknown_optimizer(fake_data): x, y = fake_data ssvm = FastSurvivalSVM(rank_ratio=0, optimizer='random stuff') with pytest.raises(ValueError, match="unknown optimizer: random stuff"): ssvm.fit(x, y)
def test_regression_not_supported(fake_data, value): x, y = fake_data ssvm = FastSurvivalSVM(rank_ratio=0, optimizer=value) with pytest.raises(ValueError, match="optimizer {!r} does not implement regression objective".format(value)): ssvm.fit(x, y)
def test_survival_squared_hinge_loss(whas500_without_ties): x, y = whas500_without_ties nrsvm = NaiveSurvivalSVM(loss='squared_hinge', dual=False, tol=8e-7, max_iter=1000, random_state=0) nrsvm.fit(x, y) rsvm = FastSurvivalSVM(optimizer='avltree', tol=8e-7, max_iter=1000, random_state=0) rsvm.fit(x, y) assert_array_almost_equal(nrsvm.coef_.ravel(), rsvm.coef_, 3) pred_nrsvm = nrsvm.predict(x) pred_rsvm = rsvm.predict(x) assert len(pred_nrsvm) == len(pred_rsvm) expected_cindex = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm) assert_cindex_almost_equal(y['fstat'], y['lenfol'], pred_rsvm, expected_cindex)
def test_compare_clinical_kernel(make_whas500): whas500 = make_whas500(to_numeric=True) trans = ClinicalKernelTransform() trans.fit(whas500.x_data_frame) kpca = KernelPCA(kernel=trans.pairwise_kernel, copy_X=True) xt = kpca.fit_transform(whas500.x) nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=500, random_state=0) nrsvm.fit(xt, whas500.y) rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel=trans.pairwise_kernel, tol=1e-8, max_iter=500, random_state=0) rsvm.fit(whas500.x, whas500.y) pred_nrsvm = nrsvm.predict(kpca.transform(whas500.x)) pred_rsvm = rsvm.predict(whas500.x) assert len(pred_nrsvm) == len(pred_rsvm) expected_cindex = concordance_index_censored(whas500.y['fstat'], whas500.y['lenfol'], pred_nrsvm) assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'], pred_rsvm, expected_cindex)
def test_rank_ratio_out_of_bounds(fake_data, value): x, y = fake_data ssvm = FastSurvivalSVM(rank_ratio=value) with pytest.raises(ValueError, match=r"rank_ratio must be in \[0; 1\]"): ssvm.fit(x, y)
def test_compare_rbf(self): x, y = load_whas500() x = encode_categorical(standardize(x)) kpca = KernelPCA(kernel="rbf") xt = kpca.fit_transform(x) nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=1000, random_state=0) nrsvm.fit(xt, y) rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel="rbf", tol=1e-8, max_iter=1000, random_state=0) rsvm.fit(x, y) pred_nrsvm = nrsvm.predict(kpca.transform(x)) pred_rsvm = rsvm.predict(x) self.assertEqual(len(pred_nrsvm), len(pred_rsvm)) c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm) c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm) self.assertAlmostEqual(c1[0], c2[0]) self.assertTupleEqual(c1[1:], c2[1:])
def test_negative_time(): x = numpy.arange(80).reshape(10, 8) y = Surv.from_arrays([0, 1, 0, 1, 1, 0, 1, 0, 0, 1], [1, 1, -2, 1, 1, 6, 1, 2, 3, 1]) rsvm = FastSurvivalSVM(rank_ratio=0.5) with pytest.raises(ValueError, match="observed time contains values smaller or equal to zero"): rsvm.fit(x, y)
def test_fit_timeit(self): rnd = numpy.random.RandomState(0) idx = rnd.choice(numpy.arange(self.x.shape[0]), replace=False, size=100) ssvm = FastSurvivalSVM(optimizer=self.OPTIMIZER, timeit=3, random_state=0) ssvm.fit(self.x.values[idx, :], self.y[idx]) self.assertTrue('timings' in ssvm.optimizer_result_)
def test_ranking_with_fit_intercept(): x = numpy.zeros((100, 10)) y = Surv.from_arrays(numpy.ones(100, dtype=bool), numpy.arange(1, 101, dtype=float)) ssvm = FastSurvivalSVM(rank_ratio=1.0, fit_intercept=True) with pytest.raises(ValueError, match="fit_intercept=True is only meaningful if rank_ratio < 1.0"): ssvm.fit(x, y)
def test_all_censored(): x = numpy.arange(80).reshape(10, 8) y = Surv.from_arrays(numpy.zeros(10, dtype=bool), [0, 1, 2, 1, 1, 0, 1, 2, 3, 1]) rsvm = FastSurvivalSVM() with pytest.raises(ValueError, match="all samples are censored"): rsvm.fit(x, y)
def test_y_invalid(y): x = numpy.zeros((100, 10)) rsvm = FastSurvivalSVM() with pytest.raises(ValueError, match='y must be a structured array with the first field' ' being a binary class event indicator and the second field' ' the time of the event/censoring'): rsvm.fit(x, y)
def test_fit_timeit(make_whas500, optimizer_any): whas500 = make_whas500(to_numeric=True) rnd = numpy.random.RandomState(0) idx = rnd.choice(numpy.arange(whas500.x.shape[0]), replace=False, size=100) ssvm = FastSurvivalSVM(optimizer=optimizer_any, timeit=3, random_state=0) ssvm.fit(whas500.x[idx, :], whas500.y[idx]) assert 'timings' in ssvm.optimizer_result_
def test_event_not_binary(): x = numpy.arange(80).reshape(10, 8) y = numpy.empty(dtype=[('event', int), ('time', float)], shape=10) y['event'] = numpy.array([0, 1, 2, 1, 1, 0, 1, 2, 3, 1], dtype=int) y['time'] = numpy.arange(10) rsvm = FastSurvivalSVM() with pytest.raises(ValueError, match="elements of event indicator must be boolean, but found int"): rsvm.fit(x, y)
def test_time_not_numeric(): x = numpy.arange(80).reshape(10, 8) y = numpy.empty(dtype=[('event', bool), ('time', bool)], shape=10) y['event'] = numpy.array([0, 1, 0, 1, 1, 0, 1, 0, 0, 1], dtype=bool) y['time'] = numpy.ones(10, dtype=bool) rsvm = FastSurvivalSVM() with pytest.raises(ValueError, match="time must be numeric, but found bool"): rsvm.fit(x, y)
def test_fit_and_predict_ranking(self): ssvm = FastSurvivalSVM(optimizer=self.OPTIMIZER, random_state=0) ssvm.fit(self.x.values, self.y) self.assertFalse(hasattr(ssvm, "intercept_")) expected_coef = numpy.array([-0.02066177, -0.26449933, -0.15205399, 0.0794547, -0.28840498, -0.02864288, 0.09901995, 0.04505302, -0.12512215, 0.03341365, -0.00110442, 0.05446756, -0.53009875, -0.01394175]) assert_array_almost_equal(expected_coef, ssvm.coef_) self.assertEquals(self.x.shape[1], ssvm.coef_.shape[0]) c = ssvm.score(self.x.values, self.y) self.assertAlmostEqual(0.7860650174985695, c, 6)
def test_fit_and_predict_regression_no_intercept(make_whas500, optimizer_regression): whas500 = make_whas500(to_numeric=True) ssvm = FastSurvivalSVM(optimizer=optimizer_regression, rank_ratio=0.0, max_iter=50, fit_intercept=False, random_state=0) ssvm.fit(whas500.x, whas500.y) assert not hasattr(ssvm, "intercept_") expected_coef = numpy.array([1.39989875, -1.16903161, -0.40195857, -0.05848903, -0.08421557, 4.11924729, 0.25135451, 1.89067276, -0.25751401, -0.10213143, 1.56333622, 3.10136873, -2.23644848, -0.11620715]) assert_array_almost_equal(expected_coef, ssvm.coef_) pred = ssvm.predict(whas500.x) rmse = numpy.sqrt(mean_squared_error(whas500.y['lenfol'], pred)) assert round(abs(15838.510668936022 - rmse), 7) == 0
def test_fit_and_predict_hybrid_no_intercept(make_whas500, optimizer_regression): whas500 = make_whas500(to_numeric=True) ssvm = FastSurvivalSVM(optimizer=optimizer_regression, rank_ratio=0.5, max_iter=50, fit_intercept=False, random_state=0) ssvm.fit(whas500.x, whas500.y) assert not hasattr(ssvm, "intercept_") expected_coef = numpy.array([0.00669121, -0.2754864, -0.14124808, 0.0748376, -0.2812598, 0.07543884, 0.09845683, 0.08398258, -0.12182314, 0.02637739, 0.03060149, 0.11870598, -0.52688224, -0.01762842]) assert_array_almost_equal(expected_coef, ssvm.coef_) pred = ssvm.predict(whas500.x) rmse = numpy.sqrt(mean_squared_error(whas500.y['lenfol'], pred)) assert round(abs(1128.4460587629746 - rmse), 7) == 0
def test_fit_and_predict_ranking(make_whas500, optimizer_any): whas500 = make_whas500(to_numeric=True) ssvm = FastSurvivalSVM(optimizer=optimizer_any, random_state=0) ssvm.fit(whas500.x, whas500.y) assert not hasattr(ssvm, "intercept_") expected_coef = numpy.array([-0.02066177, -0.26449933, -0.15205399, 0.0794547, -0.28840498, -0.02864288, 0.09901995, 0.04505302, -0.12512215, 0.03341365, -0.00110442, 0.05446756, -0.53009875, -0.01394175]) assert_array_almost_equal(expected_coef, ssvm.coef_) assert whas500.x.shape[1] == ssvm.coef_.shape[0] c = ssvm.score(whas500.x, whas500.y) assert round(abs(0.7860650174985695 - c), 6) == 0
def test_fit_and_predict_hybrid_no_intercept(self): if self.OPTIMIZER in {'simple', 'PRSVM'}: raise unittest.SkipTest("regression not implemented for " + self.OPTIMIZER) ssvm = FastSurvivalSVM(optimizer=self.OPTIMIZER, rank_ratio=0.5, max_iter=50, fit_intercept=False, random_state=0) ssvm.fit(self.x.values, self.y) self.assertFalse(hasattr(ssvm, "intercept_")) expected_coef = numpy.array([0.00669121, -0.2754864, -0.14124808, 0.0748376, -0.2812598, 0.07543884, 0.09845683, 0.08398258, -0.12182314, 0.02637739, 0.03060149, 0.11870598, -0.52688224, -0.01762842]) assert_array_almost_equal(expected_coef, ssvm.coef_) pred = ssvm.predict(self.x.values) rmse = numpy.sqrt(mean_squared_error(self.y['lenfol'], pred)) self.assertAlmostEqual(1128.4460587629746, rmse)
def test_fit_and_predict_regression_no_intercept(self): if self.OPTIMIZER in {'simple', 'PRSVM'}: raise unittest.SkipTest("regression not implemented for " + self.OPTIMIZER) ssvm = FastSurvivalSVM(optimizer=self.OPTIMIZER, rank_ratio=0.0, max_iter=50, fit_intercept=False, random_state=0) ssvm.fit(self.x.values, self.y) self.assertFalse(hasattr(ssvm, "intercept_")) expected_coef = numpy.array([1.39989875, -1.16903161, -0.40195857, -0.05848903, -0.08421557, 4.11924729, 0.25135451, 1.89067276, -0.25751401, -0.10213143, 1.56333622, 3.10136873, -2.23644848, -0.11620715]) assert_array_almost_equal(expected_coef, ssvm.coef_) pred = ssvm.predict(self.x.values) rmse = numpy.sqrt(mean_squared_error(self.y['lenfol'], pred)) self.assertAlmostEqual(15838.510668936022, rmse)
def test_fit_and_predict_regression(make_whas500, optimizer_regression): whas500 = make_whas500(to_numeric=True) ssvm = FastSurvivalSVM(optimizer=optimizer_regression, rank_ratio=0.0, max_iter=50, fit_intercept=True, random_state=0) ssvm.fit(whas500.x, whas500.y) assert round(abs(6.4160179606675278 - ssvm.intercept_), 7) == 0 expected_coef = numpy.array( [-0.0730891368237, -0.536630355029, -0.497411603275, 0.269039958377, -0.730559850692, -0.0148443526234, 0.285916578892, 0.165960302339, -0.301749910087, 0.334855938531, 0.0886214732161, 0.0554890272028, -2.12680470014, 0.0421466831393 ]) assert_array_almost_equal(expected_coef, ssvm.coef_) pred = ssvm.predict(whas500.x) rmse = numpy.sqrt(mean_squared_error(whas500.y['lenfol'], pred)) assert round(abs(1206.6556186869332 - rmse), 7) == 0
def test_fit_and_predict_hybrid(make_whas500, optimizer_regression): whas500 = make_whas500(to_numeric=True) ssvm = FastSurvivalSVM(optimizer=optimizer_regression, rank_ratio=0.5, max_iter=50, fit_intercept=True, random_state=0) ssvm.fit(whas500.x, whas500.y) assert round(abs(6.1409367385513729 - ssvm.intercept_), 7) == 0 expected_coef = numpy.array( [-0.0209254120718, -0.265768317208, -0.154254689136, 0.0800600947891, -0.290121131022, -0.0288851785213, 0.0998004550073, 0.0454100937492, -0.125863947621, 0.0343588337797, -0.000710219364914, 0.0546969104996, -0.5375338235, -0.0137995110308 ]) assert_array_almost_equal(expected_coef, ssvm.coef_) pred = ssvm.predict(whas500.x) rmse = numpy.sqrt(mean_squared_error(whas500.y['lenfol'], pred)) assert round(abs(780.52617631863893 - rmse), 7) == 0
def test_fit_and_predict_regression(self): if self.OPTIMIZER in {'simple', 'PRSVM'}: raise unittest.SkipTest("regression not implemented for " + self.OPTIMIZER) ssvm = FastSurvivalSVM(optimizer=self.OPTIMIZER, rank_ratio=0.0, max_iter=50, fit_intercept=True, random_state=0) ssvm.fit(self.x.values, self.y) self.assertAlmostEqual(6.4160179606675278, ssvm.intercept_) expected_coef = numpy.array( [-0.0730891368237, -0.536630355029, -0.497411603275, 0.269039958377, -0.730559850692, -0.0148443526234, 0.285916578892, 0.165960302339, -0.301749910087, 0.334855938531, 0.0886214732161, 0.0554890272028, -2.12680470014, 0.0421466831393 ]) assert_array_almost_equal(expected_coef, ssvm.coef_) pred = ssvm.predict(self.x.values) rmse = numpy.sqrt(mean_squared_error(self.y['lenfol'], pred)) self.assertAlmostEqual(1206.6556186869332, rmse)
def test_compare_rbf(self): x = normalize(self.x) y = self.y rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel="rbf", tol=1e-6, max_iter=65, random_state=0) rsvm.fit(x, y) kpca = KernelPCA(kernel="rbf", copy_X=True) xt = kpca.fit_transform(x) nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-6, max_iter=30, random_state=0) nrsvm.fit(xt, y) pred_nrsvm = nrsvm.predict(xt) pred_rsvm = rsvm.predict(x) self.assertEqual(len(pred_nrsvm), len(pred_rsvm)) c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm) c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm) self.assertAlmostEqual(c1[0], c2[0]) self.assertTupleEqual(c1[1:], c2[1:])