Esempio n. 1
0
def test_quality(t_true,
                 y_true,
                 pred,
                 time_grid=np.linspace(0, 300, 30, dtype=np.int),
                 concordance_at_t=None,
                 plot=False):
    # get survival proba for time_grid
    all_surv_time = pd.DataFrame()
    for t in time_grid:
        surv_prob = np.exp(-1 * np.power(t / (pred[:, 0] + 1e-6), pred[:, 1]))
        all_surv_time = pd.concat([all_surv_time, pd.DataFrame(surv_prob).T])
    all_surv_time.index = time_grid

    ev = EvalSurv(surv=all_surv_time,
                  durations=t_true,
                  events=y_true,
                  censor_surv='km')
    dt_c_index = ev.concordance_td('antolini')
    int_brier_score = ev.integrated_brier_score(time_grid)
    int_nbill = ev.integrated_nbll(time_grid)

    if plot:
        fig, ax = plt.subplots(1, 3, figsize=(20, 7))
        d = all_surv_time.sample(5, axis=1).loc[1:]
        obs = d.columns
        for o in obs:
            ax[0].plot(d.index, d[o])
        ax[0].set_xlabel('Time')
        ax[0].set_title("Sample survival curves")
        nb = ev.nbll(time_grid)
        ax[1].plot(time_grid, nb)
        ax[1].set_title('NBLL')
        ax[1].set_xlabel('Time')
        br = ev.brier_score(time_grid)
        ax[2].plot(time_grid, br)
        ax[2].set_title('Brier score')
        ax[2].set_xlabel('Time')
        plt.show()

    if concordance_at_t is not None:
        harell_c_index = concordance_index(
            predicted_scores=all_surv_time.loc[concordance_at_t, :].values,
            event_times=t_true,
            event_observed=y_true)

        return pd.DataFrame([{
            'harell_c_index': harell_c_index,
            'dt_c_index': dt_c_index,
            'int_brier_score': int_brier_score,
            'int_nbill': int_nbill
        }])
    else:
        return pd.DataFrame([{
            'dt_c_index': dt_c_index,
            'int_brier_score': int_brier_score,
            'int_nbill': int_nbill
        }])
Esempio n. 2
0
# The EvalSurv class contains some useful evaluation criteria for time-to-event prediction.
# We set censor_surv = 'km' to state that we want to use Kaplan-Meier for estimating the
# censoring distribution.

ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')

ev.concordance_td('antolini')

# Brier Score
# We can plot the the IPCW Brier score for a given set of times.
# Here we just use 100 time-points between the min and max duration in the test set.
# Note that the score becomes unstable for the highest times.
# It is therefore common to disregard the rightmost part of the graph.

time_grid = np.linspace(durations_test.min(), durations_test.max(), 100)
ev.brier_score(time_grid).plot()
plt.ylabel('Brier score')
_ = plt.xlabel('Time')

# Negative binomial log-likelihood
# In a similar manner, we can plot the the IPCW negative binomial log-likelihood.

ev.nbll(time_grid).plot()
plt.ylabel('NBLL')
_ = plt.xlabel('Time')

# Integrated scores
# The two time-dependent scores above can be integrated over time to produce a single score
# (Graf et al. 1999). In practice this is done by numerical integration over a defined time_grid.

ev.integrated_brier_score(time_grid)