Esempio n. 1
0
 def add_km_censor(self, steps='post'):
     """Add censoring estimates obtained by Kaplan-Meier on the test set
     (durations, 1-events).
     """
     km = utils.kaplan_meier(self.durations, 1-self.events)
     surv = pd.DataFrame(np.repeat(km.values.reshape(-1, 1), len(self.durations), axis=1),
                         index=km.index)
     return self.add_censor_est(surv, steps)
Esempio n. 2
0
def test_kaplan_meier_vs_lifelines(n, p_cens):
    np.random.seed(0)
    durations = np.random.uniform(0, 100, n)
    events = np.random.binomial(1, 1 - p_cens, n).astype('float')
    km = utils.kaplan_meier(durations, events)
    kmf = KaplanMeierFitter().fit(durations,
                                  events).survival_function_['KM_estimate']
    assert km.shape == kmf.shape
    assert (km - kmf).abs().max() < 1e-14
    assert (km.index == kmf.index).all()
Esempio n. 3
0
def cuts_quantiles(durations, events, num, min_=0., dtype='float64'):
    """
    If min_ = None, we will use durations.min() for the first cut.
    """
    km = utils.kaplan_meier(durations, events)
    surv_est, surv_durations = km.values, km.index.values
    s_cuts = np.linspace(km.values.min(), km.values.max(), num)
    cuts_idx = np.searchsorted(surv_est[::-1], s_cuts)[::-1]
    cuts = surv_durations[::-1][cuts_idx]
    cuts = np.unique(cuts)
    if len(cuts) != num:
        warnings.warn(f"cuts are not unique, continue with {len(cuts)} cuts instead of {num}")
    cuts[0] = durations.min() if min_ is None else min_
    assert cuts[-1] == durations.max(), 'something wrong...'
    return cuts.astype(dtype)
Esempio n. 4
0
def test_kaplan_meier():
    durations = np.array([1., 1., 2., 3.])
    events = np.array([1, 1, 1, 0])
    surv = utils.kaplan_meier(durations, events)
    assert (surv.index.values == np.arange(4, dtype=float)).all()
    assert (surv.values == np.array([1., 0.5, 0.25, 0.25])).all()