def test_concordance_index_min_samples(): event = numpy.array([False]) time = numpy.array([10]) estimate = numpy.array([12]) with pytest.raises(ValueError, match="Need a minimum of two samples"): concordance_index_censored(event, time, estimate)
def analysis_c_index(self, verbose=False): y_pred = self.data[self.var_pred][self.data_mask].values y_true = self.data[self.var_event][self.data_mask].values.astype(bool) y_time = self.data[self.var_time][self.data_mask].values if len(y_pred) <= 1: return 0, 0, 0 cidx_value_raw = concordance_index_censored(y_true, y_time, y_pred)[0] bootstrapped_scores = [] rng = np.random.RandomState(self.random_seed) for i in range(self.n_bootstraps): # bootstrap by sampling with replacement on the prediction indices indices = rng.randint(0, len(y_pred), len(y_pred)) if len(np.unique(y_true[indices])) < 2: # We need at least one positive and one negative sample for ROC AUC # to be defined: reject the sample continue score = concordance_index_censored(y_true[indices], y_time[indices], y_pred[indices])[0] bootstrapped_scores.append(score) if verbose: print("Bootstrap #{} ROC area: {:0.3f}".format(i + 1, score)) sorted_scores = np.array(bootstrapped_scores) sorted_scores.sort() confidence_lower = sorted_scores[int(0.05 * len(sorted_scores))] confidence_upper = sorted_scores[int(0.95 * len(sorted_scores))] print("C-Index value: %.3f (0.95 CI, %.3f-%.3f)" % (cidx_value_raw, confidence_lower, confidence_upper)) return (cidx_value_raw, confidence_lower, confidence_upper)
def test_concordance_index_all_censored(): event = numpy.array([False, False]) time = numpy.array([10, 12]) estimate = numpy.array([12, 13]) with pytest.raises(ValueError, match="All samples are censored"): concordance_index_censored(event, time, estimate)
def test_compare_builtin_kernel(self): x = normalize(self.x) y = self.y rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel="polynomial", gamma=0.5, degree=2, tol=1e-8, max_iter=100, random_state=0xf38) rsvm.fit(x, y) pred_rsvm = rsvm.predict(x) kpca = KernelPCA(kernel="polynomial", copy_X=True, gamma=0.5, degree=2, random_state=0xf38) xt = kpca.fit_transform(x) nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=100, random_state=0xf38) nrsvm.fit(xt, y) pred_nrsvm = nrsvm.predict(xt) self.assertEqual(len(pred_nrsvm), len(pred_rsvm)) c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm) c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm) self.assertAlmostEqual(c1[0], c2[0]) self.assertTupleEqual(c1[1:], c2[1:])
def test_compare_rbf(self): x, y = load_whas500() x = encode_categorical(standardize(x)) kpca = KernelPCA(kernel="rbf") xt = kpca.fit_transform(x) nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=1000, random_state=0) nrsvm.fit(xt, y) rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel="rbf", tol=1e-8, max_iter=1000, random_state=0) rsvm.fit(x, y) pred_nrsvm = nrsvm.predict(kpca.transform(x)) pred_rsvm = rsvm.predict(x) self.assertEqual(len(pred_nrsvm), len(pred_rsvm)) c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm) c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm) self.assertAlmostEqual(c1[0], c2[0]) self.assertTupleEqual(c1[1:], c2[1:])
def test_survival_squared_hinge_loss(self): x, y = self.get_data_without_ties() nrsvm = NaiveSurvivalSVM(loss='squared_hinge', dual=False, tol=8e-7, max_iter=1000, random_state=0) nrsvm.fit(x, y) rsvm = FastSurvivalSVM(optimizer='avltree', tol=8e-7, max_iter=1000, random_state=0) rsvm.fit(x, y) assert_array_almost_equal(nrsvm.coef_.ravel(), rsvm.coef_, 3) pred_nrsvm = nrsvm.predict(x) pred_rsvm = rsvm.predict(x) self.assertEqual(len(pred_nrsvm), len(pred_rsvm)) c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm) c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm) self.assertAlmostEqual(c1[0], c2[0]) self.assertTupleEqual(c1[1:], c2[1:])
def test_compare_clinical_kernel(self): x_full, y = load_whas500() trans = ClinicalKernelTransform() trans.fit(x_full) kpca = KernelPCA(kernel=trans.pairwise_kernel, copy_X=True) xt = kpca.fit_transform(self.x) nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=500, random_state=0) nrsvm.fit(xt, y) rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel=trans.pairwise_kernel, tol=1e-8, max_iter=500, random_state=0) rsvm.fit(self.x.values, y) pred_nrsvm = nrsvm.predict(kpca.transform(self.x)) pred_rsvm = rsvm.predict(self.x.values) self.assertEqual(len(pred_nrsvm), len(pred_rsvm)) c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm) c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm) self.assertAlmostEqual(c1[0], c2[0]) self.assertTupleEqual(c1[1:], c2[1:])
def test_concordance_index_not_1d(whas500_pred, dim): event, time, risk = whas500_pred risk = numpy.tile(risk[:, numpy.newaxis], (1, dim)) with pytest.raises(ValueError, match="Expected 1D array, got 2D array instead:"): concordance_index_censored(event, time, risk)
def test_concordance_index_boolean_event(): event = numpy.array([1, 0, 0, 1, 1, 0]) time = numpy.array([1, 5, 10, 12, 7, 65]) estimate = numpy.array([12, 8, 1, 89, 56, 13]) with pytest.raises(ValueError, match="only boolean arrays are supported as class labels for survival analysis.+"): concordance_index_censored(event, time, estimate)
def stratified_concordance_index(output, event, time, strata=None): """Calculates the stratified concordance index. If no strata is given or every individual belongs to the same strata, the unstratified concordance index is calculated. params: """ concordant = 0 discordant = 0 if strata is None: strata = np.full(len(time), 1) for strat in np.unique(strata): # Get individuals of the strata. indices_strata = np.where(strata == strat)[0] # Calculate concordance index. c_index = concordance_index_censored( event_indicator=event[indices_strata], event_time=time[indices_strata], estimate=output[indices_strata], ) # Add up concordant and discordant pairs for stratified evaluation. concordant += c_index[1] discordant += c_index[2] return concordant / (concordant + discordant)
def test_survival_squared_hinge_loss(whas500_without_ties): x, y = whas500_without_ties nrsvm = NaiveSurvivalSVM(loss='squared_hinge', dual=False, tol=8e-7, max_iter=1000, random_state=0) nrsvm.fit(x, y) rsvm = FastSurvivalSVM(optimizer='avltree', tol=8e-7, max_iter=1000, random_state=0) rsvm.fit(x, y) assert_array_almost_equal(nrsvm.coef_.ravel(), rsvm.coef_, 3) pred_nrsvm = nrsvm.predict(x) pred_rsvm = rsvm.predict(x) assert len(pred_nrsvm) == len(pred_rsvm) expected_cindex = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm) assert_cindex_almost_equal(y['fstat'], y['lenfol'], pred_rsvm, expected_cindex)
def test_compare_clinical_kernel(make_whas500): whas500 = make_whas500(to_numeric=True) trans = ClinicalKernelTransform() trans.fit(whas500.x_data_frame) kpca = KernelPCA(kernel=trans.pairwise_kernel, copy_X=True) xt = kpca.fit_transform(whas500.x) nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=500, random_state=0) nrsvm.fit(xt, whas500.y) rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel=trans.pairwise_kernel, tol=1e-8, max_iter=500, random_state=0) rsvm.fit(whas500.x, whas500.y) pred_nrsvm = nrsvm.predict(kpca.transform(whas500.x)) pred_rsvm = rsvm.predict(whas500.x) assert len(pred_nrsvm) == len(pred_rsvm) expected_cindex = concordance_index_censored(whas500.y['fstat'], whas500.y['lenfol'], pred_nrsvm) assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'], pred_rsvm, expected_cindex)
def test_compare_builtin_kernel(make_whas500): whas500 = make_whas500(to_numeric=True) x = normalize(whas500.x) rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel="polynomial", gamma=0.5, degree=2, tol=1e-8, max_iter=100, random_state=0xf38) rsvm.fit(x, whas500.y) pred_rsvm = rsvm.predict(x) kpca = KernelPCA(kernel="polynomial", copy_X=True, gamma=0.5, degree=2, random_state=0xf38) xt = kpca.fit_transform(x) nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=100, random_state=0xf38) nrsvm.fit(xt, whas500.y) pred_nrsvm = nrsvm.predict(xt) assert len(pred_nrsvm) == len(pred_rsvm) expected_cindex = concordance_index_censored(whas500.y['fstat'], whas500.y['lenfol'], pred_nrsvm) assert_cindex_almost_equal(whas500.y['fstat'], whas500.y['lenfol'], pred_rsvm, expected_cindex)
def test_fit_subsample(self): model = ComponentwiseGradientBoostingSurvivalAnalysis(n_estimators=100, subsample=0.6, random_state=0) model.fit(self.x, self.y) p = model.predict(self.x) result = concordance_index_censored(self.y['fstat'], self.y['lenfol'], p) expected_cindex = numpy.array([0.7750602, 58245, 16904, 0, 119]) assert_array_almost_equal(expected_cindex, numpy.array(result)) coef_index = ['(Intercept)'] + self.columns expected_coef = pandas.Series(numpy.zeros(15, dtype=float), index=coef_index) expected_coef['age'] = 0.041299 expected_coef['hr'] = 0.00487 expected_coef['diasbp'] = -0.003381 expected_coef['bmi'] = -0.017018 expected_coef['sho'] = 0.433685 expected_coef['chf'] = 0.510277 assert_array_almost_equal(expected_coef.values, model.coef_) self.assertTupleEqual((100,), model.train_score_.shape) self.assertTupleEqual((100,), model.oob_improvement_.shape) self.assertRaisesRegex(ValueError, 'Dimensions of X are inconsistent with training data: ' 'expected 14 features, but got 2', model.predict, self.x[:, :2])
def test_fit(self): model = ComponentwiseGradientBoostingSurvivalAnalysis(n_estimators=100) model.fit(self.x, self.y) p = model.predict(self.x) result = concordance_index_censored(self.y['fstat'], self.y['lenfol'], p) expected_cindex = numpy.array([0.7755659, 58283, 16866, 0, 119]) assert_array_almost_equal(expected_cindex, numpy.array(result)) coef_index = ['(Intercept)'] + self.columns expected_coef = pandas.Series(numpy.zeros(15, dtype=float), index=coef_index) expected_coef['age'] = 0.040919 expected_coef['hr'] = 0.004977 expected_coef['diasbp'] = -0.003407 expected_coef['bmi'] = -0.017938 expected_coef['sho'] = 0.429904 expected_coef['chf'] = 0.508211 assert_array_almost_equal(expected_coef.values, model.coef_) self.assertTupleEqual((100,), model.train_score_.shape) self.assertRaisesRegex(ValueError, 'Dimensions of X are inconsistent with training data: ' 'expected 14 features, but got 2', model.predict, self.x[:, :2])
def test_fit_custom_kernel(self): alphas = numpy.exp(numpy.linspace(numpy.log(0.001), numpy.log(0.5), 5)) svm_grid = ParameterGrid({"alpha": alphas}) transform = ClinicalKernelTransform(fit_once=True) transform.prepare(self.x) base_estimators = [] for i, params in enumerate(svm_grid): model = FastSurvivalSVM(max_iter=100, random_state=0, **params) base_estimators.append(("svm_linear_%d" % i, model)) for i, params in enumerate(svm_grid): model = FastKernelSurvivalSVM(kernel=transform.pairwise_kernel, max_iter=45, tol=1e-5, random_state=0, **params) base_estimators.append(("svm_kernel_%d" % i, model)) cv = KFold(n_splits=3, shuffle=True, random_state=0) meta = EnsembleSelection(base_estimators, n_estimators=0.4, scorer=score_cindex, cv=cv, n_jobs=4) meta.fit(self.x.values, self.y) self.assertEqual(len(meta), 10) self.assertTupleEqual(meta.scores_.shape, (10,)) p = meta.predict(self.x.values) score = concordance_index_censored(self.y['fstat'], self.y['lenfol'], p) expected_score = numpy.array([0.7978084, 59938, 15178, 33, 119]) assert_array_almost_equal(score, expected_score)
def test_fit_subsample(self): model = GradientBoostingSurvivalAnalysis(n_estimators=100, max_features=8, subsample=0.6, random_state=0) model.fit(self.x, self.y) self.assertEquals(model.max_features_, 8) self.assertTrue(hasattr(model, "oob_improvement_")) incl_mask = numpy.ones(self.x.shape[0], dtype=bool) incl_mask[[35, 111, 174, 206, 236, 268, 497]] = False x_test = self.x[incl_mask] y_test = self.y[incl_mask] p = model.predict(x_test) expected_cindex = numpy.array([0.8592640, 62905, 10303, 0, 110]) result = concordance_index_censored(y_test['fstat'], y_test['lenfol'], p) assert_array_almost_equal(expected_cindex, numpy.array(result)) self.assertTupleEqual((100, ), model.train_score_.shape) self.assertTupleEqual((100, ), model.oob_improvement_.shape) self.assertRaisesRegex( ValueError, "Number of features of the model must match the input. " "Model n_features is 14 and input n_features is 2 ", model.predict, self.x[:, :2])
def test_fit_dropout(self): model = ComponentwiseGradientBoostingSurvivalAnalysis( n_estimators=100, learning_rate=1.0, dropout_rate=0.03, random_state=0) model.fit(self.x, self.y) p = model.predict(self.x) result = concordance_index_censored(self.y['fstat'], self.y['lenfol'], p) expected_cindex = numpy.array([0.7772425, 58409, 16740, 0, 119]) assert_array_almost_equal(expected_cindex, numpy.array(result)) coef_index = ['(Intercept)'] + self.columns expected_coef = pandas.Series(numpy.zeros(15, dtype=float), index=coef_index) expected_coef['age'] = 0.275537 expected_coef['hr'] = 0.040048 expected_coef['diasbp'] = -0.029998 expected_coef['bmi'] = -0.138909 expected_coef['sho'] = 3.318941 expected_coef['chf'] = 2.851386 expected_coef['mitype'] = -0.075817 assert_array_almost_equal(expected_coef.values, model.coef_)
def test_fit(self): model = GradientBoostingSurvivalAnalysis(n_estimators=100, max_depth=3, min_samples_split=10, random_state=0) model.fit(self.x, self.y) self.assertEquals(model.max_features_, 14) self.assertFalse(hasattr(model, "oob_improvement_")) p = model.predict(self.x) expected_cindex = numpy.array( [0.86272605091218779, 64826, 10309, 14, 119]) result = concordance_index_censored(self.y['fstat'], self.y['lenfol'], p) assert_array_almost_equal(expected_cindex, numpy.array(result)) self.assertTupleEqual((100, ), model.train_score_.shape) self.assertRaisesRegex( ValueError, "Number of features of the model must match the input. " "Model n_features is 14 and input n_features is 2 ", model.predict, self.x[:, :2])
def assert_cindex_almost_equal(event_indicator, event_time, estimate, expected): result = concordance_index_censored(event_indicator, event_time, estimate) assert_array_equal(result[1:], expected[1:]) concordant, discordant, tied_risk = result[1:4] cc = (concordant + 0.5 * tied_risk) / (concordant + discordant + tied_risk) assert_almost_equal(result[0], cc) assert_almost_equal(result[0], expected[0])
def test_concordance_index(whas500_pred): event, time, risk = whas500_pred c, con, dis, tie_r, tie_t = concordance_index_censored(event, time, risk) assert 57849 == con assert 17300 == dis assert 0 == tie_r assert 14 == tie_t assert round(abs(0.7697907 - c), 6) == 0
def test_concordance_index_no_censoring_all_correct(): time = [1, 5, 6, 11, 34, 45, 46, 50] event = numpy.repeat(True, len(time)) estimate = numpy.arange(len(time))[::-1] c, con, dis, tie_r, tie_t = concordance_index_censored(event, time, estimate) assert 28 == con assert 0 == dis assert 0 == tie_r assert 0 == tie_t assert 1.0 == c
def test_breast_cancer_cvxpy(self): m = MinlipSurvivalAnalysis(solver="cvxpy", alpha=1, pairs="next") m.fit(self.x.values, self.y) self.assertTupleEqual((1, self.x.shape[0]), m.coef_.shape) p = m.predict(self.x.values) v = concordance_index_censored(self.y['cens'], self.y['time'], p) expected = numpy.array([0.59576770470121443, 79280, 53792, 0, 32]) assert_array_almost_equal(expected, v)
def evaluate_model(self, event, duration, pred): """ :param event: Pandas Series - 0/1 flags for whether patient died :param duration: Pandas Series - Time to death/censorship as integers :param pred: array, shape=(n_observed, ) - predictions from trained model :return c_index: float - Concordance Index of Model against test data """ event_flag = event.apply(lambda x: False if x == 0 else True) c_index, concord, discord, tie_risk, tie_time = concordance_index_censored( event_flag, duration, pred) return c_index
def summary_survival_with_shap_mm(model, loader, n_classes): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.eval() test_loss = 0. all_risk_scores = np.zeros((len(loader))) all_censorships = np.zeros((len(loader))) all_event_times = np.zeros((len(loader))) slide_ids = loader.dataset.slide_data['slide_id'] patient_results = {} for batch_idx, (data_WSI, data_omic, label, event_time, c) in enumerate(loader): data_WSI, data_omic = data_WSI.to(device), data_omic.to(device) label = label.to(device) slide_id = slide_ids.iloc[batch_idx] with torch.no_grad(): hazards, survival, Y_hat, _, _ = model(x_path=data_WSI, x_omic=data_omic) ig = IntegratedGradients(interpret_patient_mm) data_WSI = data_WSI.unsqueeze(0) data_omic = data_omic.unsqueeze(0) data_WSI.requires_grad_() data_omic.requires_grad_() ig_attr = ig.attribute((data_WSI, data_omic), n_steps=n_steps) ig_attr_omic = ig_attr[1].detach().cpu().data.numpy() risk = np.asscalar(-torch.sum(survival, dim=1).cpu().numpy()) event_time = np.asscalar(event_time) c = np.asscalar(c) all_risk_scores[batch_idx] = risk all_censorships[batch_idx] = c all_event_times[batch_idx] = event_time patient_results.update({ slide_id: { 'slide_id': np.array(slide_id), 'risk': risk, 'disc_label': label.item(), 'survival': event_time, 'censorship': c, 'attr': ig_attr_omic } }) c_index = concordance_index_censored((1 - all_censorships).astype(bool), all_event_times, all_risk_scores, tied_tol=1e-08)[0] return patient_results, c_index
def test_fit_spearman_correlation(self): meta = self._create_ensemble(correlation="spearman") self.assertEqual(len(meta), 0) meta.fit(self.x.values, self.y) p = meta.predict(self.x.values) score = concordance_index_censored(self.y['fstat'], self.y['lenfol'], p) expected_score = numpy.array([0.7863312, 59088, 16053, 8, 119]) assert_array_almost_equal(score, expected_score)
def test_fit_kendall_correlation(self): meta = self._create_ensemble(correlation="kendall") self.assertEqual(len(meta), 0) meta.fit(self.x.values, self.y) p = meta.predict(self.x.values) score = concordance_index_censored(self.y['fstat'], self.y['lenfol'], p) expected_score = numpy.array([0.7663043, 57570, 17545, 34, 119]) assert_array_almost_equal(score, expected_score)
def test_concordance_index_with_tied_time(): event = [False, True, True, False, True, False, True, False, False] time = [1, 5, 6, 11, 11, 34, 45, 45, 50] estimate = [5, 8, 11, 19, 34, 12, 3, 9, 12] c, con, dis, tie_r, tie_t = concordance_index_censored(event, time, estimate) assert 8 == con assert 12 == dis assert 0 == tie_r assert 2 == tie_t assert round(abs(0.4 - c), 6) == 0
def test_concordance_index_no_ties(): event = [False, True, True, False, False, True, False, False] time = [1, 5, 6, 11, 34, 45, 46, 50] estimate = [5, 8, 11, 34, 12, 3, 9, 12] c, con, dis, tie_r, tie_t = concordance_index_censored(event, time, estimate) assert 3 == con assert 10 == dis assert 0 == tie_r assert 0 == tie_t assert round(abs(0.2307692 - c), 6) == 0
def test_concordance_index_no_censoring_all_wrong(): time = [1, 5, 6, 11, 34, 45, 46, 50] event = numpy.repeat(True, len(time)) # order is exactly reversed estimate = numpy.arange(len(time)) c, con, dis, tie_r, tie_t = concordance_index_censored(event, time, estimate) assert 0 == con assert 28 == dis assert 0 == tie_r assert 0 == tie_t assert 0.0 == c