def NelsonAelan_dash(T, C): naf = NelsonAalenFitter() naf.fit(T, event_observed=C) naf.plot(title='Nelson-Aalen Estimate') naf.plot(ci_force_lines=True, title='Nelson-Aalen Estimate') py_p = plt.gcf() pyplot(py_p, legend=False)
def createHazardGraph(durations, event_observed): naf = NelsonAalenFitter() naf.fit(durations, event_observed) naf.plot(ci_show=False) plt.title("Hard Drive Nelson-Aalen Hazard Estimate") plt.ylabel("Cumulative Hazard") plt.show()
def test_naf_plotting_slice(self, block): data1 = np.random.exponential(5, size=(200, 1)) data2 = np.random.exponential(1, size=(200, 1)) naf = NelsonAalenFitter() naf.fit(data1) ax = naf.plot(loc=slice(0, None)) naf.fit(data2) naf.plot(ax=ax, ci_force_lines=True, iloc=slice(100, 180)) self.plt.title("test_naf_plotting_slice") self.plt.show(block=block) return
def test_naf_plotting_with_custom_colours(self, block): data1 = np.random.exponential(5, size=(200, 1)) data2 = np.random.exponential(1, size=(500)) naf = NelsonAalenFitter() naf.fit(data1) ax = naf.plot(color="r") naf.fit(data2) naf.plot(ax=ax, color="k") self.plt.title("test_naf_plotting_with_custom_coloirs") self.plt.show(block=block) return
def test_naf_plot_cumulative_hazard(self, block): data1 = np.random.exponential(5, size=(200, 1)) naf = NelsonAalenFitter() naf.fit(data1) ax = naf.plot() naf.plot_cumulative_hazard(ax=ax, ci_force_lines=True) self.plt.title("I should have plotted the same thing, but different styles + color!") self.plt.show(block=block) return
def plot_HR(df, with_ci=False): T = df['days_survived'] E = df['death'] naf = NelsonAalenFitter() cutoff = np.percentile(df['risk'], 75) high_risk = df['risk'] > cutoff naf.fit(T[high_risk], event_observed=E[high_risk], label='High_Risk') ax = naf.plot(ci_show=with_ci) naf.fit(T[~high_risk], event_observed=E[~high_risk], label='Low_Risk') naf.plot(ax=ax, ci_show=with_ci) plt.ylim(0, .1) plt.xlabel("Days") plt.ylabel("Risk of Death") plt.title("Cardiovascular Death Risk over time (top quartile)") if with_ci: plt.savefig("./hr_with_ci.png") else: plt.savefig("./hr_without_ci.png")
def get_sa(request): dirname = os.path.dirname(os.path.dirname(__file__)).replace('\\', '/') kmffile = '/images/test1.jpg' naffile = '/images/test2.jpg' context = {} context['kmf'] = kmffile context['naf'] = naffile if not os.path.exists(dirname + kmffile) and not os.path.exists(dirname + naffile): df = load_waltons() T = df['T'] # an array of durations E = df['E'] # a either boolean or binary array representing whether the 'death' was observed (alternatively an individual can be censored) kmf = KaplanMeierFitter(alpha=0.95) kmf.fit(durations=T, event_observed=E, timeline=None, entry=None, label='KM_estimate', alpha=None, left_censorship=False, ci_labels=None) naf = NelsonAalenFitter(alpha=0.95, nelson_aalen_smoothing=True) naf.fit(durations=T, event_observed=E, timeline=None, entry=None, label='NA_estimate', alpha=None, ci_labels=None) kmf.plot() plt.savefig(dirname + kmffile) naf.plot() plt.savefig(dirname + naffile) # return render_to_response(template_name='sa_test.html', context=context, context_instance=RequestContext(request=request)) return render(request=request, template_name='sa_test.html', context=context)
if i == 0: plt.ylabel('Frac. in staying after $n$ years') plt.tight_layout() for i, dept in enumerate(depts): ix = data['dept'] == dept kmf.fit(T[ix], E[ix], label=dept) print(dept, kmf.median_) # Looking at a hazard curve from lifelines import NelsonAalenFitter naf = NelsonAalenFitter() naf.fit(T, event_observed=E) print(naf.cumulative_hazard_.head()) naf.plot() # This hazard curve shows us that there is low hazard of someone leaving starting off, then it gets worse, # once you stay for 500 days you stay at least a bit more, then exponentially it gets worse! # SURVIVAL REGRESSION -- figuring out the influences of other aspects on whether or not someone survives # Can't use regular linear regression. Want to use Cox's model or Aalen's additive model. # Cox's Proportional Hazard model # "The idea behind the model is that the log-hazard of an individual is a linear function of their static covariates # and a population-level baseline hazard that changes over time" - from https://lifelines.readthedocs.io/en/latest/Survival%20Regression.html from lifelines.datasets import load_rossi from lifelines import CoxPHFitter rossi_dataset = load_rossi()
ax = plt.subplot(111) for r in data['Has_Children'].unique(): ix = data['Has_Children'] == r kmf.fit(data['Duration'].loc[ix], data['Divorce'].loc[ix], label=r) sns.set() ax = kmf.plot(title='Mariage Survival Estimate Based on Children', ax=ax, linewidth=2.5) #Export the figure plt.savefig('/home/raed/Dropbox/INSE - 6320/Final Project/Children.pdf') plt.show() naf = NelsonAalenFitter() naf.fit(data['Duration'], data['Divorce']) sns.set() naf.plot(title='Cumulative hazard over time', legend=False) print(naf.cumulative_hazard_.head(32)) plt.savefig( '/home/raed/Dropbox/INSE - 6320/Final Project/Cumulative_Hazard_function.pdf' ) plt.show() ax = plt.subplot(111) for r in data['Couple_Race'].unique(): ix = data['Couple_Race'] == r naf.fit(data['Duration'].loc[ix], data['Divorce'].loc[ix], label=r) sns.set() ax = naf.plot(title='Cumulative Hazard by Couple Race ', ax=ax, linewidth=2.5) #Export the figure
for r in cac_ranges: ix = cac_values == r if first == 0: kmf.fit(times[ix], censors[ix], label=r) ax = kmf.plot() first = 1 else: kmf.fit(times[ix], censors[ix], label=r) kmf.plot(ax=ax) elif curve == 'hazard': # Plot hazard curve naf = NelsonAalenFitter() first = 0 for r in cac_ranges: ix = cac_values == r if first == 0: naf.fit(times[ix], censors[ix], label=r) ax = naf.plot() first = 1 else: naf.fit(times[ix], censors[ix], label=r) naf.plot(ax=ax) ax.set_ylabel("%", fontsize=12) ax.set_title(tag, fontsize=14) ax.set_xlabel("Years to event", fontsize=12) return times
plt.ylim(0, 1) plt.title("Lifespans of different Question types in First 500 Days") # Test of significances between Question Types from lifelines.statistics import logrank_test results = logrank_test(T[short], T[~short], E[short], E[~short], alpha=.99) results.print_summary() # Applying output to a hazord curve. from lifelines import NelsonAalenFitter naf = NelsonAalenFitter() naf.fit(T, event_observed=E) naf.plot() #By question length naf.fit(T[short], event_observed=E[short], label="Shorter Questions") ax = naf.plot(loc=slice(0, 200)) naf.fit(T[~short], event_observed=E[~short], label="Longer Questions") naf.plot(ax=ax, loc=slice(0, 200)) plt.title("Cumulative hazard function by Question Length (up to 2000= days)") # Aalen's Additive Model from lifelines import CoxPHFitter cph = CoxPHFitter() #Covariance matrix import patsy sfm = patsy.dmatrix('Score_x + t_length + q_length +an_length-1',
def test(ini_file): ''' Performs training according to .ini file :param ini_file: (String) the path of .ini file :return best_c_index: the best c-index ''' # reads configuration from .ini file config = read_config(ini_file) # builds network|criterion|optimizer based on configuration model = DeepSurv(config['network']).to(device) criterion = NegativeLogLikelihood(config['network'], device).to(device) # cph = CoxPHFitter() # constructs data loaders based on configuration train_dataset = SurvivalDataset(config['train']['h5_file'], is_train=True, device=device) test_dataset = SurvivalDataset(config['train']['h5_file'], is_train=False, device=device) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=train_dataset.__len__()) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=test_dataset.__len__()) test_df = pd.read_csv( r'H:\project\DeepSurv\DeepSurv.pytorch-master\ours_test.csv', index_col=['PatientID']) train_df = pd.read_csv( r'H:\project\DeepSurv\DeepSurv.pytorch-master\ours_train.csv', index_col=['PatientID']) # train step best_c_index = 0 # kmf = KaplanMeierFitter() naf = NelsonAalenFitter() # wf = WeibullFitter() naf.fit(test_df['Time_d'], event_observed=test_df['Event']) timeline = np.arange(0, 25000) base_risk = naf.predict(timeline) i = timeline[-1] while i > 0: base_risk[i] = base_risk[i] - base_risk[i - 1] i -= 1 np.savetxt('temp.txt', base_risk, '%.17f') # base_risk.to_csv('test_base_risk.csv', header=True) model.load_state_dict( torch.load(os.path.join(models_dir, ini_file.split('\\')[-1] + '.pth'))['model']) model.eval() for X, y, e in test_loader: with torch.no_grad(): risk_pred = model(X) valid_loss = criterion(risk_pred, y, e, model) print(valid_loss) valid_c = c_index(-risk_pred, y, e) best_c_index = valid_c R = risk_pred.detach().cpu().numpy()[:, 0] for test_index in range(len(R)): # test_index = 120 # people _r = R[test_index] _y = y.detach().cpu().numpy()[test_index, 0] _e = e.detach().cpu().numpy()[test_index, 0] t0 = naf.predict(_y) risk = t0 * np.exp(_r) # print(np.exp(_r)) print(risk, int(_e)) # pre_y = 0. # m = np.min(np.where(p > 0.5)) # print(int(_y), m, _e, p[int(_y)] >= 0.5) # if (p[int(_y)] >= 0.5) == bool(_e): # ture += 1 # print(ture/len(R)) # if _e == pre_y: # ture += 1 # plt.plot(p) # plt.show() naf.plot() plt.show() return best_c_index
kmf.fit(T, event_observed=C) kmf.survival_function_.plot() plt.title('Survival of A (From the Start) Grade Restaurants in NYC') print 'Median Time on Site is: ' + str(kmf.median_) print 'Median Time on Site is: ' + str(kmf.median_) ## HAZARD FUNCTION: from lifelines import NelsonAalenFitter naf = NelsonAalenFitter() naf.fit(T, event_observed=C) ax = naf.plot(ix=slice(0, 1000), secondary_y=True) c.duration.hist(bins=100).plot(title='Distribution of Grade Changes') plt.show() ##### SPLIT BY BORO: boro = df[['CAMIS', 'BORO']].drop_duplicates() borod = pd.merge(data, boro, on=['CAMIS']) ax = plt.subplot(111) dem = (borod.BORO == "MANHATTAN") kmf.fit(T[dem], event_observed=C[dem], label="MANHATTAN") kmf.plot(ax=ax) dem2 = (borod.BORO == "BRONX") kmf.fit(T[dem2], event_observed=C[dem2], label="BRONX")
''' VISUALIZATIONS ''' # 1. Kaplan Meier Survivor Function kmf = KaplanMeierFitter() T = data['dur'] C = data['evt'] kmf.fit(T, event_observed=C) fig1 = kmf.plot(title='Survivor Function, Drop Out') fig1.savefig('fig1.png') # 2. Nelson Aalen Cumulative Hazard Function naf = NelsonAalenFitter() naf.fit(T, event_observed=C) fig2 = naf.plot(title='Cumulative Hazard Function, Drop Out') fig2.savefig('fig2.png') # 3. Cox Proportional Hazard Model cph = CoxPHFitter() cph.fit(data, 'sex', event_col='evt') fig3 = cph.predict_survival_function(data).plot() fig3.savefig('fig3.png') ''' I couldn't make this one give me the result I wanted. The functioning Stata code is: stphplot, by(sex) nolntime and the resulting visualization is... ''' img = mpimg.imread('cph.png') imgplot = plt.imshow(img)
def bayesian_model_estimation(T, E, iter_interpolate=2, n_pts=20): """ T is durations E is binary event flag iter_interpolate is number of iterations in posterior grid interpolation refinement (int, min.=1) n_pts is number of points in posterior """ # Plot non-parametric curves kmf = KaplanMeierFitter() kmf.fit(T, event_observed=E) kmf.plot() # kmf.cumulative_density_.plot(figsize=(7,6)) naf = NelsonAalenFitter() naf.fit(T, event_observed=E) plt.figure(figsize=(7, 6)) naf.plot() plt.title('Cumulative hazard rate') # Fit exponential cumulative hazard model exf = ExponentialFitter().fit( T, E, label='ExponentialFitter' ) # See https://lifelines.readthedocs.io/en/latest/Survival%20analysis%20with%20lifelines.html exf.plot_cumulative_hazard() print('fitted lambda = {}'.format( 1 / exf.lambda_)) # Confidence bounds on this? --> bootstrap? # Plot groundtruth curve plt.figure(figsize=(7, 6)) x = np.arange(1, 30) plt.plot(x, expon(scale=1 / target_rate).sf(x), 'g--', lw=2.5, alpha=.6, label='target') plt.plot(x, expon(scale=exf.lambda_).sf(x), 'r-', lw=3, alpha=.7, label='fitted') plt.legend() plt.xlabel('duration (time since event arrival') plt.title('Survival curve') # Bayesian inference of lambda # ============================ lam_range = np.linspace(0, .2, n_pts) for it in range(1, iter_interpolate + 1): print('\niteration {}'.format(it)) prior = np.ones_like(lam_range) prior /= np.sum(prior) logprior = np.log(prior) logprior /= np.sum(logprior) # Compute likelihood in original dimension (dangerously small numbers!) # post = prior # for duration, event_flag in zip(T, E): # if event_flag==1: # post *= expon(scale=1/lam_range).pdf(duration) # else: # post *= expon(scale=1/lam_range).sf(duration) # Compute likelihood in log dimension logpost = logprior #- lam_range*T.sum() + np.log(lam_range)*(1 - E).sum() # <-- vector implentation is wrong for duration, event_flag in zip(T, E): if event_flag == 1: logpost += expon(scale=1 / lam_range).logpdf(duration) else: logpost += expon(scale=1 / lam_range).logsf(duration) # Trick: shift entire log dist. by max.loglikel. before exponentiation to reduce potential underflow: maxlogl = np.max(logpost) post = np.exp(logpost - maxlogl) post /= np.sum(post) ExpectedVal = np.dot(lam_range, post) print('Mean of lambda posterior = {}'.format(ExpectedVal)) print('MAE = {}'.format(np.abs(ExpectedVal - target_rate))) # Plot lambda posterior plt.figure(figsize=(7, 6)) plt.plot(lam_range, post, 'b.-', lw=1, label='Bayes') plt.vlines(1 / exf.lambda_, 0, 1.2 * np.max(post), color='m', lw=3, alpha=.6, label='MLE') plt.vlines(target_rate, 0, 1.2 * np.max(post), color='orange', lw=3, alpha=.9, label='target') plt.vlines(ExpectedVal, 0, 1.2 * np.max(post), color='b', lw=3, alpha=.6, label='Bayes EV') plt.legend() plt.title('Lambda estimate (iteration {})'.format(it)) plt.xlabel('lambda') # Refine posterior grid evaluation points if it <= iter_interpolate: cumul_prob_dens = post.cumsum() f = interp1d(cumul_prob_dens, lam_range) cdf_new_grid_pts = np.linspace(1e-2, 1 - 1e-2, n_pts) lam_range = f(cdf_new_grid_pts)
data_ = zip(df.time/max(df.time), df.event.astype(int)) data = [(a, b) for (a,b) in data_ if a >= config.GAMMA] print("[*] Remove #%d outliers" % (len(data_) - len(data))) N = len(df) # number of data points from lifelines import KaplanMeierFitter from lifelines import NelsonAalenFitter kmf = KaplanMeierFitter() (T, E) = zip(*data) kmf.fit(T, event_observed=E) naf = NelsonAalenFitter() naf.fit(T, event_observed=E) ax = pyplot.subplot(121) naf.plot(ax=ax) ax = pyplot.subplot(122) kmf.plot(ax=ax) print naf.cumulative_hazard_ naf.cumulative_hazard_.to_csv("naf.csv") pyplot.show() data0 = [ a for (a,b) in data if b == 0 ] data1 = [ a for (a,b) in data if b == 1 ] his0,bin_edges0 = np.histogram(data0, bins=bins0, range=(config.GAMMA, 1)) his1,bin_edges1 = np.histogram(data1, bins=bins1, range=(config.GAMMA, 1))