def test_default_optimizer(make_whas500): whas500 = make_whas500(to_numeric=True) ssvm = FastKernelSurvivalSVM(tol=1e-4, max_iter=25) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=ConvergenceWarning) ssvm.fit(whas500.x, whas500.y) assert 'rbtree' == ssvm.optimizer
def test_compare_clinical_kernel(self): x_full, y = load_whas500() trans = ClinicalKernelTransform() trans.fit(x_full) kpca = KernelPCA(kernel=trans.pairwise_kernel, copy_X=True) xt = kpca.fit_transform(self.x) nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=500, random_state=0) nrsvm.fit(xt, y) rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel=trans.pairwise_kernel, tol=1e-8, max_iter=500, random_state=0) rsvm.fit(self.x.values, y) pred_nrsvm = nrsvm.predict(kpca.transform(self.x)) pred_rsvm = rsvm.predict(self.x.values) self.assertEqual(len(pred_nrsvm), len(pred_rsvm)) c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm) c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm) self.assertAlmostEqual(c1[0], c2[0]) self.assertTupleEqual(c1[1:], c2[1:])
def test_compare_clinical_kernel(make_whas500): whas500 = make_whas500(to_numeric=True) trans = ClinicalKernelTransform() trans.fit(whas500.x_data_frame) kpca = KernelPCA(kernel=trans.pairwise_kernel, copy_X=True) xt = kpca.fit_transform(whas500.x) nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=500, random_state=0) nrsvm.fit(xt, whas500.y) rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel=trans.pairwise_kernel, tol=1e-8, max_iter=500, random_state=0) rsvm.fit(whas500.x, whas500.y) pred_nrsvm = nrsvm.predict(kpca.transform(whas500.x)) pred_rsvm = rsvm.predict(whas500.x) assert len(pred_nrsvm) == len(pred_rsvm) expected_cindex = concordance_index_censored(whas500.y['fstat'], whas500.y['lenfol'], pred_nrsvm) assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'], pred_rsvm, expected_cindex)
def test_compare_builtin_kernel(make_whas500): whas500 = make_whas500(to_numeric=True) x = normalize(whas500.x) rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel="polynomial", gamma=0.5, degree=2, tol=1e-8, max_iter=100, random_state=0xf38) rsvm.fit(x, whas500.y) pred_rsvm = rsvm.predict(x) kpca = KernelPCA(kernel="polynomial", copy_X=True, gamma=0.5, degree=2, random_state=0xf38) xt = kpca.fit_transform(x) nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=100, random_state=0xf38) nrsvm.fit(xt, whas500.y) pred_nrsvm = nrsvm.predict(xt) assert len(pred_nrsvm) == len(pred_rsvm) expected_cindex = concordance_index_censored(whas500.y['fstat'], whas500.y['lenfol'], pred_nrsvm) assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'], pred_rsvm, expected_cindex)
def test_unknown_optimizer(fake_data): x, y = fake_data ssvm = FastKernelSurvivalSVM(optimizer='random stuff') with pytest.raises(ValueError, match="unknown optimizer: random stuff"): ssvm.fit(x, y)
def test_compare_rbf(self): x, y = load_whas500() x = encode_categorical(standardize(x)) kpca = KernelPCA(kernel="rbf") xt = kpca.fit_transform(x) nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=1000, random_state=0) nrsvm.fit(xt, y) rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel="rbf", tol=1e-8, max_iter=1000, random_state=0) rsvm.fit(x, y) pred_nrsvm = nrsvm.predict(kpca.transform(x)) pred_rsvm = rsvm.predict(x) self.assertEqual(len(pred_nrsvm), len(pred_rsvm)) c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm) c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm) self.assertAlmostEqual(c1[0], c2[0]) self.assertTupleEqual(c1[1:], c2[1:])
def test_compare_builtin_kernel(self): x = normalize(self.x) y = self.y rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel="polynomial", gamma=0.5, degree=2, tol=1e-8, max_iter=100, random_state=0xf38) rsvm.fit(x, y) pred_rsvm = rsvm.predict(x) kpca = KernelPCA(kernel="polynomial", copy_X=True, gamma=0.5, degree=2, random_state=0xf38) xt = kpca.fit_transform(x) nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=100, random_state=0xf38) nrsvm.fit(xt, y) pred_nrsvm = nrsvm.predict(xt) self.assertEqual(len(pred_nrsvm), len(pred_rsvm)) c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm) c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm) self.assertAlmostEqual(c1[0], c2[0]) self.assertTupleEqual(c1[1:], c2[1:])
def test_fit_precomputed_kernel_invalid_shape(fake_data): x, y = fake_data ssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel='precomputed', random_state=0) with pytest.raises(ValueError, match=r"Precomputed metric requires shape \(n_queries, n_indexed\)\. " r"Got \(100, 11\) for 100 indexed\."): ssvm.fit(x, y)
def test_predict_precomputed_kernel_invalid_shape(self): ssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel='precomputed', random_state=0) x = numpy.dot(self.x.values, self.x.values.T) ssvm.fit(x, self.y) x_new = numpy.random.randn(100, 14) self.assertRaisesRegex(ValueError, r"Precomputed metric requires shape \(n_queries, n_indexed\)\. " r"Got \(100, 14\) for 500 indexed\.", ssvm.predict, x_new)
def test_fit_and_predict_rbf_avltree(self): ssvm = FastKernelSurvivalSVM(optimizer='avltree', kernel='rbf', tol=2e-6, max_iter=75, random_state=0) ssvm.fit(self.x.values, self.y) self.assertFalse(ssvm._pairwise) self.assertEquals(self.x.shape[0], ssvm.coef_.shape[0]) c = ssvm.score(self.x.values, self.y) self.assertGreaterEqual(c, 0.965)
def test_fit_and_predict_linear_regression_no_intercept(self): ssvm = FastKernelSurvivalSVM(optimizer="rbtree", rank_ratio=0.0, kernel="linear", max_iter=50, fit_intercept=False, random_state=0) ssvm.fit(self.x.values, self.y) self.assertFalse(hasattr(ssvm, "intercept_")) pred = ssvm.predict(self.x.values) rmse = numpy.sqrt(mean_squared_error(self.y['lenfol'], pred)) self.assertAlmostEqual(rmse, 15837.658418546907, 4)
def test_fit_precomputed_kernel_not_symmetric(): ssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel='precomputed', random_state=0) x = numpy.random.randn(100, 100) x[10, 12] = -1 x[12, 10] = 9 y = Surv.from_arrays(numpy.ones(100).astype(bool), numpy.ones(100)) with pytest.raises(ValueError, match="kernel matrix is not symmetric"): ssvm.fit(x, y)
def test_fit_and_predict_rbf_avltree(self): ssvm = FastKernelSurvivalSVM(optimizer="avltree", kernel='rbf', random_state=0) ssvm.fit(self.x.values, self.y) self.assertFalse(ssvm._pairwise) self.assertEquals(self.x.shape[0], ssvm.coef_.shape[0]) c = ssvm.score(self.x.values, self.y) self.assertLessEqual(abs(0.92460312179802795 - c), 1e-3)
def test_fit_and_predict_hybrid_rbf(self): ssvm = FastKernelSurvivalSVM(optimizer="rbtree", rank_ratio=0.5, kernel="rbf", max_iter=50, fit_intercept=True, random_state=0) ssvm.fit(self.x.values, self.y) self.assertFalse(ssvm._pairwise) self.assertLessEqual(abs(5.0289145697617164 - ssvm.intercept_), 0.04) pred = ssvm.predict(self.x.values) rmse = numpy.sqrt(mean_squared_error(self.y['lenfol'], pred)) self.assertLessEqual(abs(880.20361811281487 - rmse), 75)
def test_fit_and_predict_regression_rbf(self): ssvm = FastKernelSurvivalSVM(optimizer="rbtree", rank_ratio=0.0, kernel="rbf", tol=1e-6, max_iter=50, fit_intercept=True, random_state=0) ssvm.fit(self.x.values, self.y) self.assertFalse(ssvm._pairwise) self.assertAlmostEqual(ssvm.intercept_, 4.9267218894089533) pred = ssvm.predict(self.x.values) rmse = numpy.sqrt(mean_squared_error(self.y['lenfol'], pred)) self.assertAlmostEqual(rmse, 783.525277, 6)
def test_fit_and_predict_linear(self): ssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel='linear', random_state=0) ssvm.fit(self.x.values, self.y) self.assertFalse(ssvm._pairwise) self.assertEquals(self.x.shape[0], ssvm.coef_.shape[0]) i = numpy.arange(250) numpy.random.RandomState(0).shuffle(i) c = ssvm.score(self.x.values[i], self.y[i]) self.assertAlmostEqual(c, 0.76923445664157997, 6)
def test_predict_precomputed_kernel_invalid_shape(make_whas500): whas500 = make_whas500(to_numeric=True) ssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel='precomputed', random_state=0) x = numpy.dot(whas500.x, whas500.x.T) ssvm.fit(x, whas500.y) x_new = numpy.random.randn(100, 14) with pytest.raises(ValueError, match=r"Precomputed metric requires shape \(n_queries, n_indexed\)\. " r"Got \(100, 14\) for 500 indexed\."): ssvm.predict(x_new)
def test_fit_and_predict_linear_regression_no_intercept(make_whas500): whas500 = make_whas500(to_numeric=True) ssvm = FastKernelSurvivalSVM(optimizer="rbtree", rank_ratio=0.0, kernel="linear", max_iter=50, fit_intercept=False, random_state=0) ssvm.fit(whas500.x, whas500.y) assert not hasattr(ssvm, "intercept_") pred = ssvm.predict(whas500.x) rmse = numpy.sqrt(mean_squared_error(whas500.y['lenfol'], pred)) assert round(abs(rmse - 15837.658418546907), 4) == 0
def test_fit_and_predict_rbf_rbtree(self): ssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel='rbf', random_state=0) ssvm.fit(self.x.values, self.y) self.assertFalse(ssvm._pairwise) self.assertEquals(self.x.shape[0], ssvm.coef_.shape[0]) c = ssvm.score(self.x.values, self.y) self.assertAlmostEqual(0.92230102862313534, c, 3)
def test_fit_and_predict_rbf(make_whas500, optimizer): whas500 = make_whas500(to_numeric=True) ssvm = FastKernelSurvivalSVM(optimizer=optimizer, kernel='rbf', tol=2e-6, max_iter=75, random_state=0) ssvm.fit(whas500.x, whas500.y) assert not ssvm._pairwise assert whas500.x.shape[0] == ssvm.coef_.shape[0] c = ssvm.score(whas500.x, whas500.y) assert c >= 0.965
def test_fit_and_predict_hybrid_rbf(make_whas500): whas500 = make_whas500(to_numeric=True) ssvm = FastKernelSurvivalSVM(optimizer="rbtree", rank_ratio=0.5, kernel="rbf", max_iter=50, fit_intercept=True, random_state=0) ssvm.fit(whas500.x, whas500.y) assert not ssvm._pairwise assert abs(5.0289145697617164 - ssvm.intercept_) <= 0.04 pred = ssvm.predict(whas500.x) rmse = numpy.sqrt(mean_squared_error(whas500.y['lenfol'], pred)) assert abs(880.20361811281487 - rmse) <= 75
def test_fit_and_predict_regression_rbf(make_whas500): whas500 = make_whas500(to_numeric=True) ssvm = FastKernelSurvivalSVM(optimizer="rbtree", rank_ratio=0.0, kernel="rbf", tol=1e-6, max_iter=50, fit_intercept=True, random_state=0) ssvm.fit(whas500.x, whas500.y) assert not ssvm._pairwise assert round(abs(ssvm.intercept_ - 4.9267218894089533), 7) == 0 pred = ssvm.predict(whas500.x) rmse = numpy.sqrt(mean_squared_error(whas500.y['lenfol'], pred)) assert round(abs(rmse - 783.525277), 6) == 0
def test_fit_and_predict_linear(make_whas500): whas500 = make_whas500(to_numeric=True) ssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel='linear', random_state=0) ssvm.fit(whas500.x, whas500.y) assert not ssvm._pairwise assert whas500.x.shape[0] == ssvm.coef_.shape[0] i = numpy.arange(250) numpy.random.RandomState(0).shuffle(i) c = ssvm.score(whas500.x[i], whas500.y[i]) assert round(abs(c - 0.76923445664157997), 6) == 0
def test_fit_and_predict_linear_regression_precomputed(self): ssvm = FastKernelSurvivalSVM(optimizer="rbtree", rank_ratio=0.0, kernel="precomputed", max_iter=50, fit_intercept=True, random_state=0) x = numpy.dot(self.x.values, self.x.values.T) ssvm.fit(x, self.y) self.assertTrue(ssvm._pairwise) self.assertAlmostEqual(ssvm.intercept_, 6.3979746625712295, 5) i = numpy.arange(250) numpy.random.RandomState(0).shuffle(i) pred = ssvm.predict(x[i]) rmse = numpy.sqrt(mean_squared_error(self.y['lenfol'][i], pred)) self.assertLessEqual(abs(1339.3006854574726 - rmse), 0.25)
def test_fit_and_predict_clinical_kernel(make_whas500): whas500 = make_whas500(to_numeric=True) trans = ClinicalKernelTransform() trans.fit(whas500.x_data_frame) ssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel=trans.pairwise_kernel, tol=7e-7, max_iter=100, random_state=0) ssvm.fit(whas500.x, whas500.y) assert not ssvm._pairwise assert whas500.x.shape[0] == ssvm.coef_.shape[0] c = ssvm.score(whas500.x, whas500.y) assert c >= 0.854
def test_fit_and_predict_linear_regression_precomputed(make_whas500): whas500 = make_whas500(to_numeric=True) ssvm = FastKernelSurvivalSVM(optimizer="rbtree", rank_ratio=0.0, kernel="precomputed", max_iter=50, fit_intercept=True, random_state=0) x = numpy.dot(whas500.x, whas500.x.T) ssvm.fit(x, whas500.y) assert ssvm._pairwise assert round(abs(ssvm.intercept_ - 6.3979746625712295), 5) == 0 i = numpy.arange(250) numpy.random.RandomState(0).shuffle(i) pred = ssvm.predict(x[i]) rmse = numpy.sqrt(mean_squared_error(whas500.y['lenfol'][i], pred)) assert rmse <= 1339.3006854574726 + 0.293
def test_fit_and_predict_clinical_kernel(self): x_full, y = load_whas500() trans = ClinicalKernelTransform() trans.fit(x_full) x = self.x ssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel=trans.pairwise_kernel, tol=7e-7, max_iter=100, random_state=0) ssvm.fit(x.values, y) self.assertFalse(ssvm._pairwise) self.assertEquals(x.shape[0], ssvm.coef_.shape[0]) c = ssvm.score(x.values, y) self.assertGreaterEqual(c, 0.854)
def test_fit_and_predict_linear_regression(make_whas500): whas500 = make_whas500(to_numeric=True) ssvm = FastKernelSurvivalSVM(optimizer="rbtree", rank_ratio=0.0, kernel="linear", max_iter=50, tol=1e-8, fit_intercept=True, random_state=0) ssvm.fit(whas500.x, whas500.y) assert not ssvm._pairwise assert round(abs(ssvm.intercept_ - 6.416017539824949), 5) == 0 i = numpy.arange(250) numpy.random.RandomState(0).shuffle(i) pred = ssvm.predict(whas500.x[i]) rmse = numpy.sqrt(mean_squared_error(whas500.y['lenfol'][i], pred)) assert rmse <= 1342.274550652291 + 0.293 c = ssvm.score(whas500.x[i], whas500.y[i]) assert round(abs(c - 0.7630027323714108), 6) == 0
def test_fit_and_predict_clinical_kernel(self): x_full, y = load_whas500() trans = ClinicalKernelTransform() trans.fit(x_full) x = encode_categorical(standardize(x_full)) ssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel=trans.pairwise_kernel, max_iter=100, random_state=0) ssvm.fit(x.values, y) self.assertFalse(ssvm._pairwise) self.assertEquals(x.shape[0], ssvm.coef_.shape[0]) c = ssvm.score(x.values, y) self.assertLessEqual(abs(0.83699051218246412 - c), 1e-3)
def test_fit_and_predict_linear_regression_precomputed(make_whas500): whas500 = make_whas500(to_numeric=True) ssvm = FastKernelSurvivalSVM(optimizer="rbtree", rank_ratio=0.0, kernel="precomputed", max_iter=50, tol=1e-8, fit_intercept=True, random_state=0) x = numpy.dot(whas500.x, whas500.x.T) ssvm.fit(x, whas500.y) assert ssvm._get_tags()["pairwise"] assert round(abs(ssvm.intercept_ - 6.416017539824949), 5) == 0 i = numpy.arange(250) numpy.random.RandomState(0).shuffle(i) pred = ssvm.predict(x[i]) rmse = numpy.sqrt(mean_squared_error(whas500.y['lenfol'][i], pred)) assert rmse <= 1342.274550652291 + 0.293