def test_rmst_exactely_with_known_solution(): T = np.random.exponential(2, 100) exp = ExponentialFitter().fit(T) lambda_ = exp.lambda_ assert abs(utils.restricted_mean_survival_time(exp) - lambda_) < 0.001 assert abs(utils.restricted_mean_survival_time(exp, t=lambda_) - lambda_ * (np.e - 1) / np.e) < 0.001
def test_rmst_approximate_solution(): T = np.random.exponential(2, 4000) exp = ExponentialFitter().fit(T, timeline=np.linspace(0, T.max(), 10000)) lambda_ = exp.lambda_ with pytest.warns(exceptions.ApproximationWarning) as w: assert (abs( utils.restricted_mean_survival_time(exp, t=lambda_) - utils.restricted_mean_survival_time(exp.survival_function_, t=lambda_)) < 0.001)
def test_rmst_variance(): T = np.random.exponential(2, 1000) expf = ExponentialFitter().fit(T) hazard = 1 / expf.lambda_ t = 1 sq = 2 / hazard ** 2 * (1 - np.exp(-hazard * t) * (1 + hazard * t)) actual_mean = 1 / hazard * (1 - np.exp(-hazard * t)) actual_var = sq - actual_mean ** 2 assert abs(utils.restricted_mean_survival_time(expf, t=t, return_variance=True)[0] - actual_mean) < 0.001 assert abs(utils.restricted_mean_survival_time(expf, t=t, return_variance=True)[1] - actual_var) < 0.001
def test_rmst_works_at_kaplan_meier_with_left_censoring(): T = [5] kmf = KaplanMeierFitter().fit_left_censoring(T) results = utils.restricted_mean_survival_time(kmf, t=10, return_variance=True) assert abs(results[0] - 5) < 0.0001 assert abs(results[1] - 0) < 0.0001
def surv_mean_from_python(isdead, nbdays): """ """ from lifelines.utils import restricted_mean_survival_time kaplan = KaplanMeierFitter() kaplan.fit( nbdays, event_observed=isdead, ) survmean = restricted_mean_survival_time(kaplan) return survmean
def test_rmst_works_at_kaplan_meier_edge_case(): T = [1, 2, 3, 4, 10] kmf = KaplanMeierFitter().fit(T) # when S(t)=0, doesn't matter about extending past assert utils.restricted_mean_survival_time(kmf, t=10) == utils.restricted_mean_survival_time(kmf, t=10.001) assert utils.restricted_mean_survival_time(kmf, t=9.9) <= utils.restricted_mean_survival_time(kmf, t=10.0) assert abs((utils.restricted_mean_survival_time(kmf, t=4) - (1.0 + 0.8 + 0.6 + 0.4))) < 0.0001 assert abs((utils.restricted_mean_survival_time(kmf, t=4 + 0.1) - (1.0 + 0.8 + 0.6 + 0.4 + 0.2 * 0.1))) < 0.0001
def rmst_plot(model, model2=None, t=np.inf, ax=None, text_position=None, **plot_kwargs): """ This functions plots the survival function of the model plus it's area-under-the-curve (AUC) up until the point ``t``. The AUC is known as the restricted mean survival time (RMST). To compare the difference between two models' survival curves, you can supply an additional model in ``model2``. Parameters ----------- model: lifelines.UnivariateFitter model2: lifelines.UnivariateFitter, optional used to compute the delta RMST of two models t: float the upper bound of the expectation ax: axis text_position: tuple move the text position of the RMST. Examples --------- >>> from lifelines.utils import restricted_mean_survival_time >>> from lifelines.datasets import load_waltons >>> from lifelines.plotting import rmst_plot >>> >>> df = load_waltons() >>> ix = df['group'] == 'miR-137' >>> T, E = df['T'], df['E'] >>> time_limit = 50 >>> >>> kmf_exp = KaplanMeierFitter().fit(T[ix], E[ix], label='exp') >>> kmf_con = KaplanMeierFitter().fit(T[~ix], E[~ix], label='control') >>> >>> ax = plt.subplot(311) >>> rmst_plot(kmf_exp, t=time_limit, ax=ax) >>> >>> ax = plt.subplot(312) >>> rmst_plot(kmf_con, t=time_limit, ax=ax) >>> >>> ax = plt.subplot(313) >>> rmst_plot(kmf_exp, model2=kmf_con, t=time_limit, ax=ax) """ from lifelines.utils import restricted_mean_survival_time if ax is None: ax = plt.gca() rmst = restricted_mean_survival_time(model, t=t) c = ax._get_lines.get_next_color() model.plot_survival_function(ax=ax, color=c, ci_show=False, **plot_kwargs) if text_position is None: text_position = (np.percentile(model.timeline, 10), 0.15) if model2 is not None: c2 = ax._get_lines.get_next_color() rmst2 = restricted_mean_survival_time(model2, t=t) model2.plot_survival_function(ax=ax, color=c2, ci_show=False, **plot_kwargs) timeline = np.unique(model.timeline.tolist() + model2.timeline.tolist() + [t]) predict1 = model.predict(timeline).loc[:t] predict2 = model2.predict(timeline).loc[:t] # positive ax.fill_between( timeline[timeline <= t], predict1, predict2, where=predict1 > predict2, step="post", facecolor="w", hatch="|", edgecolor="grey", ) # negative ax.fill_between( timeline[timeline <= t], predict1, predict2, where=predict1 < predict2, step="post", hatch="-", facecolor="w", edgecolor="grey", ) ax.text( text_position[0], text_position[1], "RMST(%s) -\n RMST(%s)=%.3f" % (model._label, model2._label, rmst - rmst2), ) # dynamically pick this. else: rmst = restricted_mean_survival_time(model, t=t) sf_exp_at_limit = model.predict(np.append(model.timeline, t)).sort_index().loc[:t] ax.fill_between(sf_exp_at_limit.index, sf_exp_at_limit.values, step="post", color=c, alpha=0.25) ax.text(text_position[0], text_position[1], "RMST=%.3f" % rmst) # dynamically pick this. ax.axvline(t, ls="--", color="k") ax.set_ylim(0, 1) return ax
def bug_survival_data(bd): def fit_kmf_one(group, supergroup_name, N): fuzzer = group.name[0] target, program = supergroup_name[:2] if (fuzzer, target, program) in N: N = N.loc[(fuzzer, target, program)] else: N = 1 records = group.reset_index(drop=True)['Time'].reindex(np.arange(N)) T = records.fillna(bd.duration) E = records.notnull() kmf = KaplanMeierFitter() kmf.fit(T, E, label='%s' % (fuzzer)) return kmf def fit_kmf_all(group, N): def fillmissing(group, supergroup_name): target, program, bug = supergroup_name fuzzer = group.name metrics = set(['reached', 'triggered']) group_metrics = set(group['Metric'].unique()) for metric in metrics.difference(group_metrics): new_row = pd.Series({ 'Fuzzer': fuzzer, 'Target': target, 'Program': program, 'Campaign': 0, 'Metric': metric, 'BugID': bug }) group = group.append(new_row, ignore_index=True) return group name = group.name fuzzers = N.index.get_level_values('Fuzzer').unique() fuzzers_in_group = group['Fuzzer'].unique() for fuzzer in fuzzers: if fuzzer in fuzzers_in_group: continue new_rows = [ pd.Series({ 'Fuzzer': fuzzer, 'Metric': 'reached' }), pd.Series({ 'Fuzzer': fuzzer, 'Metric': 'triggered' }), ] group = group.append(new_rows, ignore_index=True) group = group.groupby('Fuzzer').apply(fillmissing, name).reset_index(drop=True) subgroups = group.groupby(['Fuzzer', 'Metric']).apply(fit_kmf_one, name, N) return subgroups df = bd.frame N = df.reset_index().groupby(['Fuzzer', 'Target', 'Program'])['Campaign'].nunique() kmf = df.reset_index() \ .groupby(['Target', 'Program', 'BugID']) \ .apply(fit_kmf_all, N) # get the mean survival time for every (target, program, bug, fuzzer, metric) tuple means = kmf.applymap( lambda k: restricted_mean_survival_time(k, bd.duration)) # re-arrange the dataframe such that the columns are the metrics means = means.stack(level=0) # for every (target, bug, fuzzer) tuple, select the row corresponding to the program where the bug was triggered earliest means = means.loc[means.groupby(['Target', 'BugID', 'Fuzzer' ])[Metric.TRIGGERED.value].idxmin()] # re-arrange dataframe so that index is (target, bug) and columns are (fuzzer, metric) means = means.droplevel('Program').stack().unstack(-2).unstack() return kmf, means