예제 #1
0
def hazard_plot(df):
    T = df["duration"]
    E = df["degraded_obs"]
    naf = ll.NelsonAalenFitter()
    naf.fit(T, event_observed=E)
    ax = plt.subplot(111)
    concrete = ((df['P/S'] != 1) & (df['Steel'] != 1))
    steel = (df['Steel'] == 1)
    ps = (df['P/S'] == 1)
    concrete_model = naf.fit(durations=T[concrete],
                             event_observed=E[concrete],
                             label='Concrete Bridges')
    concrete_model.plot_hazard(bandwidth=10, ax=ax, figsize=(12, 10))
    steel_model = naf.fit(durations=T[steel],
                          event_observed=E[steel],
                          label='Steel Bridges')
    steel_model.plot_hazard(bandwidth=10, ax=ax)
    ps_model = naf.fit(durations=T[ps],
                       event_observed=E[ps],
                       label='P/S Bridges')
    ps_model.plot_hazard(bandwidth=10, ax=ax)
    plt.title('Hazard Functions: Concrete vs Steel vs P/S', fontsize=20)
    plt.savefig(
        '/Users/ian/Documents/exploratory/bridges/reports/figures/hazard_c_s_ps.png'
    )
    plt.show()
    plt.clf()
    plt.close()
예제 #2
0
def nelson_plot(dataframe, group_col=None, event_col='TTE', observed_col='OBS',
                bandwidth=8, xlim=None, ax=None):
    """
    Creates a Nelson-Aalen plot for each group in `group_col`

    Parameters
    ----------
    dataframe : DataFrame
        Data to use for plots
    group_col : str, optional
        If provided, groups data by this column before fitting to plot
    event_col : str, optional
        Name of the time to event column
    observed_col : str, optional
        Name of the event observed column. 1 - observed, 0 otherwise.
    bandwidth : int
        Bandwidth to use for Hazard estimate
    xlim : int
        Length of x-axis for plot
    ax : axis, optional
        If adding to an existing plot, set this to the existing ax value

    Returns
    -------
    None
        Call to plt.plot() of Nelson-Aalen Hazard estimate
    """

    naf = lifelines.NelsonAalenFitter()

    if group_col is not None:
        title_add = ' by ' + group_col
        for group in dataframe[group_col].unique():
            grp = (dataframe[group_col] == group)
            naf.fit(dataframe[event_col][grp],
                    event_observed=dataframe[observed_col][grp],
                    label=group)
            if ax is None:
                ax = naf.plot_hazard(bandwidth=bandwidth)
            else:
                ax = naf.plot_hazard(ax=ax, bandwidth=bandwidth)
    else:
        title_add = ''
        naf.fit(dataframe[event_col],
                event_observed=dataframe[observed_col],
                label='Overall Survival Trend')
        if ax is None:
            ax = naf.plot_hazard(bandwidth=bandwidth)
        else:
            ax = naf.plot_hazard(ax=ax, bandwidth=bandwidth)

    if xlim is not None:
        ax.set_xlim(left=0, right=xlim)

    plt.title('Estimated Hazard Rate' + title_add)
예제 #3
0
def test_fleming_harrington_same_as_nelson_aalen_with_no_counts():
    naf = lifelines.NelsonAalenFitter(nelson_aalen_smoothing=False)
    for i in range(100):
        test_params = []
        for b in ((1, 100), (0.5, 20)):
            test_params.append(np.random.uniform(*b))
        test_params = np.array(test_params)
        x = surpyval.Weibull.random(int(np.random.uniform(2, 1000, 1)), *test_params)
        x_test = np.random.uniform(x.min()/2, x.max()*2, 100)
        ll_est = naf.fit(x).predict(x_test).values
        surp_est = surpyval.FlemingHarrington.fit(x).Hf(x_test)
        if not np.allclose(ll_est, surp_est, 1e-15):
            raise AssertionError('Fleming-Harrington fails different to lifelines?!')
예제 #4
0
def test_nelson_aalen_against_lifelines():
    naf = lifelines.NelsonAalenFitter(nelson_aalen_smoothing=False)
    for i in range(100):
        test_params = []
        for b in ((1, 100), (0.5, 20)):
            test_params.append(np.random.uniform(*b))
        test_params = np.array(test_params)
        x = surpyval.Weibull.random(int(np.random.uniform(2, 1000, 1)), *test_params)
        n = np.ones_like(x) * int(np.random.uniform(1, 5))
        x_test = np.random.uniform(x.min()/2, x.max()*2, 100)
        ll_est = naf.fit(x, weights=n).predict(x_test).values
        surp_est = surpyval.NelsonAalen.fit(x, n=n).Hf(x_test)
        if not np.allclose(ll_est, surp_est, 1e-15):
            raise AssertionError('Kaplan-Meier different to lifelines?!')
예제 #5
0
def test_fleming_harrington_HF_less_than_or_equal_to_nelson_aalen_with_counts():
    naf = lifelines.NelsonAalenFitter(nelson_aalen_smoothing=False)
    for i in range(100):
        test_params = []
        for b in ((1, 100), (0.5, 20)):
            test_params.append(np.random.uniform(*b))
        test_params = np.array(test_params)

        x = surpyval.Weibull.random(int(np.random.uniform(2, 1000, 1)), *test_params)
        n = np.ones_like(x) * int(np.random.uniform(2, 5))

        x_test = np.random.uniform(x.min()/2, x.max()*2, 100)
        ll_na_est = naf.fit(x, weights=n).predict(x_test).values
        surp_est = surpyval.FlemingHarrington.fit(x, n=n).Hf(x_test)
        # FH cumulative hazard should be less than NA Hf
        diff = surp_est - ll_na_est
        if (diff < 0).all():
            raise AssertionError('Fleming-Harrington not all below NelsonAalen')
예제 #6
0
import plotly
import lifelines
from datatable import data, T, E
from color import colors

naefig = plotly.subplots.make_subplots(rows=1, cols=1, print_grid=False)

nae = lifelines.NelsonAalenFitter()
nae.fit(T, event_observed=E)
naefig.append_trace(
    plotly.graph_objs.Scatter(x=nae.cumulative_hazard_.index,
                              y=nae.cumulative_hazard_.values.flatten(),
                              name="Nelson Aalen"), 1, 1)

naefig.update_layout(plot_bgcolor=colors['background'],
                     paper_bgcolor=colors['background'],
                     font_color=colors['text'],
                     xaxis=dict(title="Time Period"),
                     yaxis=dict(title="Churning Probability"))
예제 #7
0
churn_months = churn_months[churn_months.is_churn_final.notnull()]

sns.distplot(churn_months.month)

T = churn_months.month
E = churn_months.is_churn_final

kmf = ll.KaplanMeierFitter()
kmf.fit(T, E)

sf = kmf.survival_function_.reset_index()
kmf.cumulative_density_
kmf.median_
kmf.plot()
sf

plt.step(sf.timeline, sf.KM_estimate * 100, where='post', color='#1e488f')
plt.xlim(0, 27.5)
plt.savefig(data_directory + 'data_viz/km_plot2.png',
            transparent=True,
            dpi=2000)

naf = ll.NelsonAalenFitter()
naf.fit(T, E)

naf.plot()
ch = naf.cumulative_hazard_.reset_index()
ch_sub = ch[ch.timeline.isin([6, 12, 18, 24])]
ch_sub
예제 #8
0
t = np.linspace(0, 70, 71)
kmf_w_aid.fit(duration[w_aid], event_observed=arrested[w_aid], 
              timeline=t, label="Received Financial Aid")
ax = kmf_w_aid.plot(ax=ax)
#print("Median survival time of democratic:", kmf.median_)

kmf_no_aid.fit(duration[~w_aid], event_observed=arrested[~w_aid], 
               timeline=t, label="No Financial Aid")
ax = kmf_no_aid.plot(ax=ax)
#print("Median survival time of non-democratic:", kmf.median_)

plt.ylim(0,1)
plt.title("Recidivism for Participants Who Received Financial Aid \nvs. Those Who Did Not");

# + {"id": "zdM4GOAOsgzN", "colab_type": "code", "outputId": "a85fa30c-4695-4e93-ffa0-a15f84e733e8", "colab": {"base_uri": "https://localhost:8080/", "height": 500}}
naf = lifelines.NelsonAalenFitter()
naf.fit(duration, arrested)

print(naf.cumulative_hazard_.head())
naf.plot();

# + {"id": "uJVPVtmYsgzR", "colab_type": "code", "outputId": "e11867c7-9a27-4c1d-833c-d07e23ffcbd4", "colab": {"base_uri": "https://localhost:8080/", "height": 403}}
naf_w_aid = lifelines.NelsonAalenFitter()
naf_no_aid = lifelines.NelsonAalenFitter()

naf_w_aid.fit(duration[w_aid], event_observed=arrested[w_aid], 
              timeline=t, label="Received Financial Aid")
ax = naf_w_aid.plot(loc=slice(0, 50))
naf_no_aid.fit(duration[~w_aid], event_observed=arrested[~w_aid], 
               timeline=t, label="No Financial Aid")
ax = naf_no_aid.plot(ax=ax, loc=slice(0, 50))