Ejemplo n.º 1
0
def test_rmst_exactely_with_known_solution():
    T = np.random.exponential(2, 100)
    exp = ExponentialFitter().fit(T)
    lambda_ = exp.lambda_

    assert abs(utils.restricted_mean_survival_time(exp) - lambda_) < 0.001
    assert abs(utils.restricted_mean_survival_time(exp, t=lambda_) - lambda_ * (np.e - 1) / np.e) < 0.001
Ejemplo n.º 2
0
def test_rmst_approximate_solution():
    T = np.random.exponential(2, 4000)
    exp = ExponentialFitter().fit(T, timeline=np.linspace(0, T.max(), 10000))
    lambda_ = exp.lambda_

    with pytest.warns(exceptions.ApproximationWarning) as w:

        assert (abs(
            utils.restricted_mean_survival_time(exp, t=lambda_) -
            utils.restricted_mean_survival_time(exp.survival_function_,
                                                t=lambda_)) < 0.001)
Ejemplo n.º 3
0
def test_rmst_variance():

    T = np.random.exponential(2, 1000)
    expf = ExponentialFitter().fit(T)
    hazard = 1 / expf.lambda_
    t = 1

    sq = 2 / hazard ** 2 * (1 - np.exp(-hazard * t) * (1 + hazard * t))
    actual_mean = 1 / hazard * (1 - np.exp(-hazard * t))
    actual_var = sq - actual_mean ** 2

    assert abs(utils.restricted_mean_survival_time(expf, t=t, return_variance=True)[0] - actual_mean) < 0.001
    assert abs(utils.restricted_mean_survival_time(expf, t=t, return_variance=True)[1] - actual_var) < 0.001
Ejemplo n.º 4
0
def test_rmst_works_at_kaplan_meier_with_left_censoring():

    T = [5]
    kmf = KaplanMeierFitter().fit_left_censoring(T)

    results = utils.restricted_mean_survival_time(kmf, t=10, return_variance=True)
    assert abs(results[0] - 5) < 0.0001
    assert abs(results[1] - 0) < 0.0001
Ejemplo n.º 5
0
def surv_mean_from_python(isdead, nbdays):
    """
    """
    from lifelines.utils import restricted_mean_survival_time

    kaplan = KaplanMeierFitter()

    kaplan.fit(
        nbdays,
        event_observed=isdead,
    )

    survmean = restricted_mean_survival_time(kaplan)

    return survmean
Ejemplo n.º 6
0
def test_rmst_works_at_kaplan_meier_edge_case():

    T = [1, 2, 3, 4, 10]
    kmf = KaplanMeierFitter().fit(T)

    # when S(t)=0, doesn't matter about extending past
    assert utils.restricted_mean_survival_time(kmf, t=10) == utils.restricted_mean_survival_time(kmf, t=10.001)

    assert utils.restricted_mean_survival_time(kmf, t=9.9) <= utils.restricted_mean_survival_time(kmf, t=10.0)

    assert abs((utils.restricted_mean_survival_time(kmf, t=4) - (1.0 + 0.8 + 0.6 + 0.4))) < 0.0001
    assert abs((utils.restricted_mean_survival_time(kmf, t=4 + 0.1) - (1.0 + 0.8 + 0.6 + 0.4 + 0.2 * 0.1))) < 0.0001
Ejemplo n.º 7
0
def rmst_plot(model, model2=None, t=np.inf, ax=None, text_position=None, **plot_kwargs):
    """
    This functions plots the survival function of the model plus it's area-under-the-curve (AUC) up
    until the point ``t``. The AUC is known as the restricted mean survival time (RMST).

    To compare the difference between two models' survival curves, you can supply an
    additional model in ``model2``.

    Parameters
    -----------
    model: lifelines.UnivariateFitter
    model2: lifelines.UnivariateFitter, optional
        used to compute the delta RMST of two models
    t: float
        the upper bound of the expectation
    ax: axis
    text_position: tuple
        move the text position of the RMST.


    Examples
    ---------

    >>> from lifelines.utils import restricted_mean_survival_time
    >>> from lifelines.datasets import load_waltons
    >>> from lifelines.plotting import rmst_plot
    >>>
    >>> df = load_waltons()
    >>> ix = df['group'] == 'miR-137'
    >>> T, E = df['T'], df['E']
    >>> time_limit = 50
    >>>
    >>> kmf_exp = KaplanMeierFitter().fit(T[ix], E[ix], label='exp')
    >>> kmf_con = KaplanMeierFitter().fit(T[~ix], E[~ix], label='control')
    >>>
    >>> ax = plt.subplot(311)
    >>> rmst_plot(kmf_exp, t=time_limit, ax=ax)
    >>>
    >>> ax = plt.subplot(312)
    >>> rmst_plot(kmf_con, t=time_limit, ax=ax)
    >>>
    >>> ax = plt.subplot(313)
    >>> rmst_plot(kmf_exp, model2=kmf_con, t=time_limit, ax=ax)



    """
    from lifelines.utils import restricted_mean_survival_time

    if ax is None:
        ax = plt.gca()

    rmst = restricted_mean_survival_time(model, t=t)
    c = ax._get_lines.get_next_color()
    model.plot_survival_function(ax=ax, color=c, ci_show=False, **plot_kwargs)

    if text_position is None:
        text_position = (np.percentile(model.timeline, 10), 0.15)

    if model2 is not None:
        c2 = ax._get_lines.get_next_color()
        rmst2 = restricted_mean_survival_time(model2, t=t)
        model2.plot_survival_function(ax=ax, color=c2, ci_show=False, **plot_kwargs)
        timeline = np.unique(model.timeline.tolist() + model2.timeline.tolist() + [t])
        predict1 = model.predict(timeline).loc[:t]
        predict2 = model2.predict(timeline).loc[:t]
        # positive
        ax.fill_between(
            timeline[timeline <= t],
            predict1,
            predict2,
            where=predict1 > predict2,
            step="post",
            facecolor="w",
            hatch="|",
            edgecolor="grey",
        )

        # negative
        ax.fill_between(
            timeline[timeline <= t],
            predict1,
            predict2,
            where=predict1 < predict2,
            step="post",
            hatch="-",
            facecolor="w",
            edgecolor="grey",
        )

        ax.text(
            text_position[0],
            text_position[1],
            "RMST(%s) -\n   RMST(%s)=%.3f" % (model._label, model2._label, rmst - rmst2),
        )  # dynamically pick this.
    else:
        rmst = restricted_mean_survival_time(model, t=t)
        sf_exp_at_limit = model.predict(np.append(model.timeline, t)).sort_index().loc[:t]
        ax.fill_between(sf_exp_at_limit.index, sf_exp_at_limit.values, step="post", color=c, alpha=0.25)
        ax.text(text_position[0], text_position[1], "RMST=%.3f" % rmst)  # dynamically pick this.

    ax.axvline(t, ls="--", color="k")
    ax.set_ylim(0, 1)
    return ax
Ejemplo n.º 8
0
def bug_survival_data(bd):
    def fit_kmf_one(group, supergroup_name, N):
        fuzzer = group.name[0]
        target, program = supergroup_name[:2]
        if (fuzzer, target, program) in N:
            N = N.loc[(fuzzer, target, program)]
        else:
            N = 1
        records = group.reset_index(drop=True)['Time'].reindex(np.arange(N))
        T = records.fillna(bd.duration)
        E = records.notnull()
        kmf = KaplanMeierFitter()
        kmf.fit(T, E, label='%s' % (fuzzer))
        return kmf

    def fit_kmf_all(group, N):
        def fillmissing(group, supergroup_name):
            target, program, bug = supergroup_name
            fuzzer = group.name
            metrics = set(['reached', 'triggered'])
            group_metrics = set(group['Metric'].unique())
            for metric in metrics.difference(group_metrics):
                new_row = pd.Series({
                    'Fuzzer': fuzzer,
                    'Target': target,
                    'Program': program,
                    'Campaign': 0,
                    'Metric': metric,
                    'BugID': bug
                })
                group = group.append(new_row, ignore_index=True)
            return group

        name = group.name
        fuzzers = N.index.get_level_values('Fuzzer').unique()
        fuzzers_in_group = group['Fuzzer'].unique()
        for fuzzer in fuzzers:
            if fuzzer in fuzzers_in_group:
                continue
            new_rows = [
                pd.Series({
                    'Fuzzer': fuzzer,
                    'Metric': 'reached'
                }),
                pd.Series({
                    'Fuzzer': fuzzer,
                    'Metric': 'triggered'
                }),
            ]
            group = group.append(new_rows, ignore_index=True)

        group = group.groupby('Fuzzer').apply(fillmissing,
                                              name).reset_index(drop=True)

        subgroups = group.groupby(['Fuzzer',
                                   'Metric']).apply(fit_kmf_one, name, N)
        return subgroups

    df = bd.frame
    N = df.reset_index().groupby(['Fuzzer', 'Target',
                                  'Program'])['Campaign'].nunique()
    kmf = df.reset_index() \
            .groupby(['Target', 'Program', 'BugID']) \
            .apply(fit_kmf_all, N)

    # get the mean survival time for every (target, program, bug, fuzzer, metric) tuple
    means = kmf.applymap(
        lambda k: restricted_mean_survival_time(k, bd.duration))
    # re-arrange the dataframe such that the columns are the metrics
    means = means.stack(level=0)
    # for every (target, bug, fuzzer) tuple, select the row corresponding to the program where the bug was triggered earliest
    means = means.loc[means.groupby(['Target', 'BugID', 'Fuzzer'
                                     ])[Metric.TRIGGERED.value].idxmin()]
    # re-arrange dataframe so that index is (target, bug) and columns are (fuzzer, metric)
    means = means.droplevel('Program').stack().unstack(-2).unstack()

    return kmf, means