def test_feature_mismatch(make_data): data, _ = make_data() x = data.iloc[:, :2] y = data.iloc[:, 2:] with pytest.raises(ValueError, match='columns do not match'): clinical_kernel(x, y) y = numpy.zeros((10, 17)) with pytest.raises(ValueError, match='x and y have different number of features'): clinical_kernel(x, y)
def test_clinical_kernel_only_nominal(self): mat = clinical_kernel( self.data.drop(['age', 'lymph node size', 'lymph node spread'], axis=1)) expected = _get_expected_matrix(with_continuous=False, with_ordinal=False) assert_array_almost_equal(expected, mat, 4)
def test_clinical_kernel_no_continuous(self): mat = clinical_kernel(self.data.drop('age', axis=1)) expected = _get_expected_matrix(with_continuous=False) assert_array_almost_equal(expected, mat, 4)
def test_clinical_kernel_no_nominal(self): mat = clinical_kernel(self.data.drop('metastasis', axis=1)) expected = _get_expected_matrix(with_nominal=False) assert_array_almost_equal(expected, mat, 4)
def test_clinical_kernel_1(self): mat = clinical_kernel(self.data) expected = _get_expected_matrix() assert_array_almost_equal(expected, mat, 4)
def test_clinical_kernel_x_and_y(self): mat = clinical_kernel(self.data.iloc[:3, :], self.data.iloc[3:, :]) m = _get_expected_matrix() expected = m[:3:, 3:] assert_array_almost_equal(expected, mat, 4)
def test_clinical_kernel_no_ordinal(make_data): data, expected = make_data(with_ordinal=False) mat = clinical_kernel(data) assert_array_almost_equal(expected, mat, 4)
def test_clinical_kernel_1(make_data): data, expected = make_data() mat = clinical_kernel(data) assert_array_almost_equal(expected, mat, 4)
def test_clinical_kernel_x_and_y(make_data): data, m = make_data() mat = clinical_kernel(data.iloc[:3, :], data.iloc[3:, :]) expected = m[:3:, 3:] assert_array_almost_equal(expected, mat, 4)
def test_clinical_kernel_no_continuous(make_data): data, expected = make_data(with_continuous=False) mat = clinical_kernel(data) assert_array_almost_equal(expected, mat, 4)
## Correct the follow uo days less than 0 to 0 for idx, item in enumerate(data_y['time_to_event']): if item < 0: data_y['time_to_event'][idx] = 0 # data_y # df.groupby('status').count() # Part 2: FastKernelSurvivalSVM from sklearn.model_selection import ShuffleSplit, GridSearchCV from sksurv.metrics import concordance_index_censored from sksurv.svm import FastKernelSurvivalSVM from sksurv.kernels import clinical_kernel kernel_matrix = clinical_kernel(data_x) kssvm = FastKernelSurvivalSVM(optimizer="rbtree", kernel="precomputed", random_state=0) ## define a function for evaluating the performance of models during grid search using Harrell's concordance index def score_survival_model(model, X, y): prediction = model.predict(X) result = concordance_index_censored(y['status'], y['time_to_event'], prediction) return result[0] param_grid = {'alpha': [0.001, 0.01, 0.1, 0.5, 1, 10, 100, 1000]} cv = ShuffleSplit(n_splits=200, test_size=0.3, random_state=0)