def test_quality(t_true, y_true, pred, time_grid=np.linspace(0, 300, 30, dtype=np.int), concordance_at_t=None, plot=False): # get survival proba for time_grid all_surv_time = pd.DataFrame() for t in time_grid: surv_prob = np.exp(-1 * np.power(t / (pred[:, 0] + 1e-6), pred[:, 1])) all_surv_time = pd.concat([all_surv_time, pd.DataFrame(surv_prob).T]) all_surv_time.index = time_grid ev = EvalSurv(surv=all_surv_time, durations=t_true, events=y_true, censor_surv='km') dt_c_index = ev.concordance_td('antolini') int_brier_score = ev.integrated_brier_score(time_grid) int_nbill = ev.integrated_nbll(time_grid) if plot: fig, ax = plt.subplots(1, 3, figsize=(20, 7)) d = all_surv_time.sample(5, axis=1).loc[1:] obs = d.columns for o in obs: ax[0].plot(d.index, d[o]) ax[0].set_xlabel('Time') ax[0].set_title("Sample survival curves") nb = ev.nbll(time_grid) ax[1].plot(time_grid, nb) ax[1].set_title('NBLL') ax[1].set_xlabel('Time') br = ev.brier_score(time_grid) ax[2].plot(time_grid, br) ax[2].set_title('Brier score') ax[2].set_xlabel('Time') plt.show() if concordance_at_t is not None: harell_c_index = concordance_index( predicted_scores=all_surv_time.loc[concordance_at_t, :].values, event_times=t_true, event_observed=y_true) return pd.DataFrame([{ 'harell_c_index': harell_c_index, 'dt_c_index': dt_c_index, 'int_brier_score': int_brier_score, 'int_nbill': int_nbill }]) else: return pd.DataFrame([{ 'dt_c_index': dt_c_index, 'int_brier_score': int_brier_score, 'int_nbill': int_nbill }])
def get_metrics(val_data, surv_pred, time_grid): ev = EvalSurv(surv_pred, val_data['t'], val_data['y'], censor_surv='km') return pd.DataFrame([{ 'dt_c_index': ev.concordance_td('antolini'), 'int_brier_score': ev.integrated_brier_score(time_grid), 'int_nbill': ev.integrated_nbll(time_grid) }])
def Bootstrap(self, surv, event: list, duration: list): np.random.seed(42) # control reproducibility cindex, brier, nbll = [], [], [] for _ in range(self.bootstrap_n): sampled_index = choices(range(surv.shape[1]), k=surv.shape[1]) sampled_surv = surv.iloc[:, sampled_index] sampled_event = [event[i] for i in sampled_index] sampled_duration = [duration[i] for i in sampled_index] ev = EvalSurv(sampled_surv, np.array(sampled_duration), np.array(sampled_event).astype(int), censor_surv='km') time_grid = np.linspace(min(sampled_duration), max(sampled_duration), 100) cindex.append(ev.concordance_td('antolini')) brier.append(ev.integrated_brier_score(time_grid)) nbll.append(ev.integrated_nbll(time_grid)) return cindex, brier, nbll
# Training ====================================================================== epochs = args.epochs callbacks = [tt.callbacks.EarlyStopping()] verbose = True log = model.fit(x_train, y_train, batch_size, epochs, callbacks, verbose, val_data=val, val_batch_size=batch_size) # log = model.fit(x_train, y_train_transformed, batch_size, epochs, callbacks, verbose, val_data = val_transformed, val_batch_size = batch_size) # Evaluation =================================================================== _ = model.compute_baseline_hazards() surv = model.predict_surv_df(x_test) ev = EvalSurv(surv, durations_test, events_test, censor_surv='km') # ctd = ev.concordance_td() ctd = ev.concordance_td() time_grid = np.linspace(durations_test.min(), durations_test.max(), 100) ibs = ev.integrated_brier_score(time_grid) nbll = ev.integrated_nbll(time_grid) val_loss = min(log.monitors['val_'].scores['loss']['score']) wandb.log({'val_loss': val_loss, 'ctd': ctd, 'ibs': ibs, 'nbll': nbll}) wandb.finish()
callbacks = [tt.callbacks.EarlyStopping()] log = model.fit(x_train, y_train, batch_size, epochs, callbacks, val_data=val) surv = model.predict_surv_df(x_test) ev = EvalSurv(surv, durations_test, events_test, censor_surv='km') result = pd.DataFrame([[0]*8],columns=["random","model","hiddens", "lr","scalers","c-index","brier","nll"]) result["c-index"] = ev.concordance_td('antolini') print(ev.concordance_td('antolini') ) time_grid = np.linspace(durations_test.min(), durations_test.max(), 100) result["brier"] = ev.integrated_brier_score(time_grid) print(ev.integrated_brier_score(time_grid) ) result["nll"] = ev.integrated_nbll(time_grid) result["lr"] = lr result["model"] = mod result["scaler"] = scale result["random"] = seed result["hiddens"] = dim results = pd.concat([results,result],ignore_index=True) results.to_csv(os.path.join(outpath,"results")) pc_col = ['PC'+str(i) for i in range(x_train.shape[1])] pc_train = pd.DataFrame(x_train,columns = pc_col) plot_survival(pc_train,pc_col,model,outpath,duration=100)