def hazard_plot(df): T = df["duration"] E = df["degraded_obs"] naf = ll.NelsonAalenFitter() naf.fit(T, event_observed=E) ax = plt.subplot(111) concrete = ((df['P/S'] != 1) & (df['Steel'] != 1)) steel = (df['Steel'] == 1) ps = (df['P/S'] == 1) concrete_model = naf.fit(durations=T[concrete], event_observed=E[concrete], label='Concrete Bridges') concrete_model.plot_hazard(bandwidth=10, ax=ax, figsize=(12, 10)) steel_model = naf.fit(durations=T[steel], event_observed=E[steel], label='Steel Bridges') steel_model.plot_hazard(bandwidth=10, ax=ax) ps_model = naf.fit(durations=T[ps], event_observed=E[ps], label='P/S Bridges') ps_model.plot_hazard(bandwidth=10, ax=ax) plt.title('Hazard Functions: Concrete vs Steel vs P/S', fontsize=20) plt.savefig( '/Users/ian/Documents/exploratory/bridges/reports/figures/hazard_c_s_ps.png' ) plt.show() plt.clf() plt.close()
def nelson_plot(dataframe, group_col=None, event_col='TTE', observed_col='OBS', bandwidth=8, xlim=None, ax=None): """ Creates a Nelson-Aalen plot for each group in `group_col` Parameters ---------- dataframe : DataFrame Data to use for plots group_col : str, optional If provided, groups data by this column before fitting to plot event_col : str, optional Name of the time to event column observed_col : str, optional Name of the event observed column. 1 - observed, 0 otherwise. bandwidth : int Bandwidth to use for Hazard estimate xlim : int Length of x-axis for plot ax : axis, optional If adding to an existing plot, set this to the existing ax value Returns ------- None Call to plt.plot() of Nelson-Aalen Hazard estimate """ naf = lifelines.NelsonAalenFitter() if group_col is not None: title_add = ' by ' + group_col for group in dataframe[group_col].unique(): grp = (dataframe[group_col] == group) naf.fit(dataframe[event_col][grp], event_observed=dataframe[observed_col][grp], label=group) if ax is None: ax = naf.plot_hazard(bandwidth=bandwidth) else: ax = naf.plot_hazard(ax=ax, bandwidth=bandwidth) else: title_add = '' naf.fit(dataframe[event_col], event_observed=dataframe[observed_col], label='Overall Survival Trend') if ax is None: ax = naf.plot_hazard(bandwidth=bandwidth) else: ax = naf.plot_hazard(ax=ax, bandwidth=bandwidth) if xlim is not None: ax.set_xlim(left=0, right=xlim) plt.title('Estimated Hazard Rate' + title_add)
def test_fleming_harrington_same_as_nelson_aalen_with_no_counts(): naf = lifelines.NelsonAalenFitter(nelson_aalen_smoothing=False) for i in range(100): test_params = [] for b in ((1, 100), (0.5, 20)): test_params.append(np.random.uniform(*b)) test_params = np.array(test_params) x = surpyval.Weibull.random(int(np.random.uniform(2, 1000, 1)), *test_params) x_test = np.random.uniform(x.min()/2, x.max()*2, 100) ll_est = naf.fit(x).predict(x_test).values surp_est = surpyval.FlemingHarrington.fit(x).Hf(x_test) if not np.allclose(ll_est, surp_est, 1e-15): raise AssertionError('Fleming-Harrington fails different to lifelines?!')
def test_nelson_aalen_against_lifelines(): naf = lifelines.NelsonAalenFitter(nelson_aalen_smoothing=False) for i in range(100): test_params = [] for b in ((1, 100), (0.5, 20)): test_params.append(np.random.uniform(*b)) test_params = np.array(test_params) x = surpyval.Weibull.random(int(np.random.uniform(2, 1000, 1)), *test_params) n = np.ones_like(x) * int(np.random.uniform(1, 5)) x_test = np.random.uniform(x.min()/2, x.max()*2, 100) ll_est = naf.fit(x, weights=n).predict(x_test).values surp_est = surpyval.NelsonAalen.fit(x, n=n).Hf(x_test) if not np.allclose(ll_est, surp_est, 1e-15): raise AssertionError('Kaplan-Meier different to lifelines?!')
def test_fleming_harrington_HF_less_than_or_equal_to_nelson_aalen_with_counts(): naf = lifelines.NelsonAalenFitter(nelson_aalen_smoothing=False) for i in range(100): test_params = [] for b in ((1, 100), (0.5, 20)): test_params.append(np.random.uniform(*b)) test_params = np.array(test_params) x = surpyval.Weibull.random(int(np.random.uniform(2, 1000, 1)), *test_params) n = np.ones_like(x) * int(np.random.uniform(2, 5)) x_test = np.random.uniform(x.min()/2, x.max()*2, 100) ll_na_est = naf.fit(x, weights=n).predict(x_test).values surp_est = surpyval.FlemingHarrington.fit(x, n=n).Hf(x_test) # FH cumulative hazard should be less than NA Hf diff = surp_est - ll_na_est if (diff < 0).all(): raise AssertionError('Fleming-Harrington not all below NelsonAalen')
import plotly import lifelines from datatable import data, T, E from color import colors naefig = plotly.subplots.make_subplots(rows=1, cols=1, print_grid=False) nae = lifelines.NelsonAalenFitter() nae.fit(T, event_observed=E) naefig.append_trace( plotly.graph_objs.Scatter(x=nae.cumulative_hazard_.index, y=nae.cumulative_hazard_.values.flatten(), name="Nelson Aalen"), 1, 1) naefig.update_layout(plot_bgcolor=colors['background'], paper_bgcolor=colors['background'], font_color=colors['text'], xaxis=dict(title="Time Period"), yaxis=dict(title="Churning Probability"))
churn_months = churn_months[churn_months.is_churn_final.notnull()] sns.distplot(churn_months.month) T = churn_months.month E = churn_months.is_churn_final kmf = ll.KaplanMeierFitter() kmf.fit(T, E) sf = kmf.survival_function_.reset_index() kmf.cumulative_density_ kmf.median_ kmf.plot() sf plt.step(sf.timeline, sf.KM_estimate * 100, where='post', color='#1e488f') plt.xlim(0, 27.5) plt.savefig(data_directory + 'data_viz/km_plot2.png', transparent=True, dpi=2000) naf = ll.NelsonAalenFitter() naf.fit(T, E) naf.plot() ch = naf.cumulative_hazard_.reset_index() ch_sub = ch[ch.timeline.isin([6, 12, 18, 24])] ch_sub
t = np.linspace(0, 70, 71) kmf_w_aid.fit(duration[w_aid], event_observed=arrested[w_aid], timeline=t, label="Received Financial Aid") ax = kmf_w_aid.plot(ax=ax) #print("Median survival time of democratic:", kmf.median_) kmf_no_aid.fit(duration[~w_aid], event_observed=arrested[~w_aid], timeline=t, label="No Financial Aid") ax = kmf_no_aid.plot(ax=ax) #print("Median survival time of non-democratic:", kmf.median_) plt.ylim(0,1) plt.title("Recidivism for Participants Who Received Financial Aid \nvs. Those Who Did Not"); # + {"id": "zdM4GOAOsgzN", "colab_type": "code", "outputId": "a85fa30c-4695-4e93-ffa0-a15f84e733e8", "colab": {"base_uri": "https://localhost:8080/", "height": 500}} naf = lifelines.NelsonAalenFitter() naf.fit(duration, arrested) print(naf.cumulative_hazard_.head()) naf.plot(); # + {"id": "uJVPVtmYsgzR", "colab_type": "code", "outputId": "e11867c7-9a27-4c1d-833c-d07e23ffcbd4", "colab": {"base_uri": "https://localhost:8080/", "height": 403}} naf_w_aid = lifelines.NelsonAalenFitter() naf_no_aid = lifelines.NelsonAalenFitter() naf_w_aid.fit(duration[w_aid], event_observed=arrested[w_aid], timeline=t, label="Received Financial Aid") ax = naf_w_aid.plot(loc=slice(0, 50)) naf_no_aid.fit(duration[~w_aid], event_observed=arrested[~w_aid], timeline=t, label="No Financial Aid") ax = naf_no_aid.plot(ax=ax, loc=slice(0, 50))