Ejemplo n.º 1
0
def test_quality(t_true,
                 y_true,
                 pred,
                 time_grid=np.linspace(0, 300, 30, dtype=np.int),
                 concordance_at_t=None,
                 plot=False):
    # get survival proba for time_grid
    all_surv_time = pd.DataFrame()
    for t in time_grid:
        surv_prob = np.exp(-1 * np.power(t / (pred[:, 0] + 1e-6), pred[:, 1]))
        all_surv_time = pd.concat([all_surv_time, pd.DataFrame(surv_prob).T])
    all_surv_time.index = time_grid

    ev = EvalSurv(surv=all_surv_time,
                  durations=t_true,
                  events=y_true,
                  censor_surv='km')
    dt_c_index = ev.concordance_td('antolini')
    int_brier_score = ev.integrated_brier_score(time_grid)
    int_nbill = ev.integrated_nbll(time_grid)

    if plot:
        fig, ax = plt.subplots(1, 3, figsize=(20, 7))
        d = all_surv_time.sample(5, axis=1).loc[1:]
        obs = d.columns
        for o in obs:
            ax[0].plot(d.index, d[o])
        ax[0].set_xlabel('Time')
        ax[0].set_title("Sample survival curves")
        nb = ev.nbll(time_grid)
        ax[1].plot(time_grid, nb)
        ax[1].set_title('NBLL')
        ax[1].set_xlabel('Time')
        br = ev.brier_score(time_grid)
        ax[2].plot(time_grid, br)
        ax[2].set_title('Brier score')
        ax[2].set_xlabel('Time')
        plt.show()

    if concordance_at_t is not None:
        harell_c_index = concordance_index(
            predicted_scores=all_surv_time.loc[concordance_at_t, :].values,
            event_times=t_true,
            event_observed=y_true)

        return pd.DataFrame([{
            'harell_c_index': harell_c_index,
            'dt_c_index': dt_c_index,
            'int_brier_score': int_brier_score,
            'int_nbill': int_nbill
        }])
    else:
        return pd.DataFrame([{
            'dt_c_index': dt_c_index,
            'int_brier_score': int_brier_score,
            'int_nbill': int_nbill
        }])
def get_metrics(val_data, surv_pred, time_grid):
    ev = EvalSurv(surv_pred, val_data['t'], val_data['y'], censor_surv='km')
    return pd.DataFrame([{
        'dt_c_index':
        ev.concordance_td('antolini'),
        'int_brier_score':
        ev.integrated_brier_score(time_grid),
        'int_nbill':
        ev.integrated_nbll(time_grid)
    }])
Ejemplo n.º 3
0
    def Bootstrap(self, surv, event: list, duration: list):
        np.random.seed(42)  # control reproducibility

        cindex, brier, nbll = [], [], []
        for _ in range(self.bootstrap_n):
            sampled_index = choices(range(surv.shape[1]), k=surv.shape[1])

            sampled_surv = surv.iloc[:, sampled_index]
            sampled_event = [event[i] for i in sampled_index]
            sampled_duration = [duration[i] for i in sampled_index]

            ev = EvalSurv(sampled_surv, np.array(sampled_duration),
                          np.array(sampled_event).astype(int), censor_surv='km')
            time_grid = np.linspace(min(sampled_duration), max(sampled_duration), 100)

            cindex.append(ev.concordance_td('antolini'))
            brier.append(ev.integrated_brier_score(time_grid))
            nbll.append(ev.integrated_nbll(time_grid))

        return cindex, brier, nbll
Ejemplo n.º 4
0
    # Training ======================================================================

    epochs = args.epochs
    callbacks = [tt.callbacks.EarlyStopping()]
    verbose = True
    log = model.fit(x_train,
                    y_train,
                    batch_size,
                    epochs,
                    callbacks,
                    verbose,
                    val_data=val,
                    val_batch_size=batch_size)
    # log = model.fit(x_train, y_train_transformed, batch_size, epochs, callbacks, verbose, val_data = val_transformed, val_batch_size = batch_size)

    # Evaluation ===================================================================

    _ = model.compute_baseline_hazards()
    surv = model.predict_surv_df(x_test)
    ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')

    # ctd = ev.concordance_td()
    ctd = ev.concordance_td()
    time_grid = np.linspace(durations_test.min(), durations_test.max(), 100)

    ibs = ev.integrated_brier_score(time_grid)
    nbll = ev.integrated_nbll(time_grid)
    val_loss = min(log.monitors['val_'].scores['loss']['score'])
    wandb.log({'val_loss': val_loss, 'ctd': ctd, 'ibs': ibs, 'nbll': nbll})
    wandb.finish()
    
                    callbacks = [tt.callbacks.EarlyStopping()]
                    log = model.fit(x_train, y_train, batch_size, epochs, callbacks, val_data=val)
                    
                    surv = model.predict_surv_df(x_test)
                    ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')
                    
                    result = pd.DataFrame([[0]*8],columns=["random","model","hiddens",
                                                   "lr","scalers","c-index","brier","nll"])
                    
                    result["c-index"] = ev.concordance_td('antolini')  
                    print(ev.concordance_td('antolini') )
                    time_grid = np.linspace(durations_test.min(), durations_test.max(), 100)
                    result["brier"] = ev.integrated_brier_score(time_grid) 
                    print(ev.integrated_brier_score(time_grid) )
                    result["nll"] = ev.integrated_nbll(time_grid) 
                    result["lr"] = lr
                    result["model"] = mod
                    result["scaler"] = scale
                    result["random"] = seed
                    result["hiddens"] = dim
                    
                    results = pd.concat([results,result],ignore_index=True)
                    results.to_csv(os.path.join(outpath,"results"))
                
                    pc_col = ['PC'+str(i) for i in range(x_train.shape[1])]
                    pc_train = pd.DataFrame(x_train,columns = pc_col)
                    plot_survival(pc_train,pc_col,model,outpath,duration=100)