def test_naf_plot_cumulative_hazard_bandwith_1(self, block): data1 = np.random.exponential(5, size=(2000, 1)) ** 2 naf = NelsonAalenFitter() naf.fit(data1) naf.plot_hazard(bandwidth=5.0, iloc=slice(0, 1700)) self.plt.title("test_naf_plot_cumulative_hazard_bandwith_1") self.plt.show(block=block) return
module_survival_data['event'] = 1 groups = module_survival_data['module_name'] T = module_survival_data['duration_weeks'] E = module_survival_data['event'] from lifelines import NelsonAalenFitter naf = NelsonAalenFitter() bandwidth = 3. for i, each in enumerate(list(module_survival_data['module_name'].unique())): ix = (groups == each) naf.fit(T[ix], event_observed=E[ix], label=each) if i == 0: ax = naf.plot_hazard(bandwidth=bandwidth, ci_show=False) else: ax = naf.plot_hazard(ax=ax, bandwidth=bandwidth, ci_show=False) ax.set_title("Hazard function of different modules | bandwidth=%.1f" % bandwidth) # Survival curves for tools import pandas from datetime import datetime, timedelta from lifelines import KaplanMeierFitter data_path = '/home/parthae/Documents/Projects/TISS_Git/projects/data_collation/data/data_latest' cg_data = pandas.read_csv( data_path + '/state_level/state_tools_data/ct_metrics_tools_March31st2019.csv') cg_data['state'] = 'cg'
# PURPOSE ax = plt.subplot() for purpose in df_cox.PURPOSE.unique(): is_pur = (df_cox.PURPOSE == purpose) naf.fit(T[is_pur], event_observed=E[is_pur], label=purpose) naf.plot(ax=ax) # univariate analysis: hazard fxn # NOTE: no real distinction b = 3 # ORIG_CHN ax = plt.subplot() for chn in df_cox.ORIG_CHN.unique(): is_chn = (df_cox.ORIG_CHN == chn) naf.fit(T[is_chn], event_observed=E[is_chn], label=chn) naf.plot_hazard(ax=ax, bandwidth=b) # PURPOSE ax = plt.subplot() for purpose in df_cox.PURPOSE.unique(): is_pur = (df_cox.PURPOSE == purpose) naf.fit(T[is_pur], event_observed=E[is_pur], label=purpose) naf.plot_hazard(ax=ax, bandwidth=b) ############################################################ # Cox proportional: statsmodels ############################################################ formula = ('AGE ~ {0} + {1}'.format( '+'.join(['ORIG_CHN', 'PURPOSE'] + ['NUM_BO', 'CSCORE_MN']), '+'.join(['np.log({0})'.format(c) for c in ['ORIG_AMT', 'ORIG_VAL']])))
ax = naf.plot_hazard(bandwidth=bandwidth,figsize=(10,8)) naf.fit(T[~small_ans], event_observed=E[~small_ans], label= category2) naf.plot_hazard(ax=ax, bandwidth=bandwidth,figsize=(10,8)) plt.legend(title = column_name,prop={'size': 12}) plt.title('Curve Smoothening and Comparing({})'.format(column_name),size=12) # utility function for a plot generation to explain the smoothness and bandwidth factor visually def smooth_analysis(): n = 10 while(n<=100): naf.plot_hazard(bandwidth=n,figsize=(15,13),label=n) n+=10 plt.tight_layout() plt.legend(title= 'Bandwidth',prop={'size': 12}) # plotting the NA Estimator plot_estimator() # getting a visualization of bandwidth and smoothness of the NA Estimator smooth_analysis() # smoothing the NA Estimator naf.plot_hazard(bandwidth=10,figsize=(12,10),title="Smoother NA Estimator",label="NA Estimator") # smoothing and comparing according to the column's entry plot_function('Male','Female',3,'gender') plot_function('Yes','No',3,'PhoneService') plot_function('Yes','No',3,'Partner') plot_function('Yes','No',3,'Dependents') plot_function('Yes','No',3,'PaperlessBilling')