예제 #1
0
def NelsonAelan_dash(T, C):
    naf = NelsonAalenFitter()
    naf.fit(T, event_observed=C)
    naf.plot(title='Nelson-Aalen Estimate')
    naf.plot(ci_force_lines=True, title='Nelson-Aalen Estimate')
    py_p = plt.gcf()
    pyplot(py_p, legend=False)
예제 #2
0
def createHazardGraph(durations, event_observed):
    naf = NelsonAalenFitter()
    naf.fit(durations, event_observed)
    naf.plot(ci_show=False)

    plt.title("Hard Drive Nelson-Aalen Hazard Estimate")
    plt.ylabel("Cumulative Hazard")
    plt.show()
예제 #3
0
 def test_naf_plotting_slice(self, block):
     data1 = np.random.exponential(5, size=(200, 1))
     data2 = np.random.exponential(1, size=(200, 1))
     naf = NelsonAalenFitter()
     naf.fit(data1)
     ax = naf.plot(loc=slice(0, None))
     naf.fit(data2)
     naf.plot(ax=ax, ci_force_lines=True, iloc=slice(100, 180))
     self.plt.title("test_naf_plotting_slice")
     self.plt.show(block=block)
     return
예제 #4
0
 def test_naf_plotting_with_custom_colours(self, block):
     data1 = np.random.exponential(5, size=(200, 1))
     data2 = np.random.exponential(1, size=(500))
     naf = NelsonAalenFitter()
     naf.fit(data1)
     ax = naf.plot(color="r")
     naf.fit(data2)
     naf.plot(ax=ax, color="k")
     self.plt.title("test_naf_plotting_with_custom_coloirs")
     self.plt.show(block=block)
     return
예제 #5
0
 def test_naf_plot_cumulative_hazard(self, block):
     data1 = np.random.exponential(5, size=(200, 1))
     naf = NelsonAalenFitter()
     naf.fit(data1)
     ax = naf.plot()
     naf.plot_cumulative_hazard(ax=ax, ci_force_lines=True)
     self.plt.title("I should have plotted the same thing, but different styles + color!")
     self.plt.show(block=block)
     return
예제 #6
0
def plot_HR(df, with_ci=False):
    T = df['days_survived']
    E = df['death']
    naf = NelsonAalenFitter()

    cutoff = np.percentile(df['risk'], 75)
    high_risk = df['risk'] > cutoff

    naf.fit(T[high_risk], event_observed=E[high_risk], label='High_Risk')
    ax = naf.plot(ci_show=with_ci)
    naf.fit(T[~high_risk], event_observed=E[~high_risk], label='Low_Risk')
    naf.plot(ax=ax, ci_show=with_ci)

    plt.ylim(0, .1)
    plt.xlabel("Days")
    plt.ylabel("Risk of Death")
    plt.title("Cardiovascular Death Risk over time (top quartile)")
    if with_ci:
        plt.savefig("./hr_with_ci.png")
    else:
        plt.savefig("./hr_without_ci.png")
예제 #7
0
def get_sa(request):
    dirname = os.path.dirname(os.path.dirname(__file__)).replace('\\', '/')
    kmffile = '/images/test1.jpg'
    naffile = '/images/test2.jpg'
    context = {}
    context['kmf'] = kmffile
    context['naf'] = naffile
    if not os.path.exists(dirname + kmffile) and not os.path.exists(dirname + naffile):
        df = load_waltons()
        T = df['T']  # an array of durations
        E = df['E']  # a either boolean or binary array representing whether the 'death' was observed (alternatively an individual can be censored)
        kmf = KaplanMeierFitter(alpha=0.95)
        kmf.fit(durations=T, event_observed=E, timeline=None, entry=None, label='KM_estimate', alpha=None, left_censorship=False, ci_labels=None)

        naf = NelsonAalenFitter(alpha=0.95, nelson_aalen_smoothing=True)
        naf.fit(durations=T, event_observed=E, timeline=None, entry=None, label='NA_estimate', alpha=None, ci_labels=None)

        kmf.plot()
        plt.savefig(dirname + kmffile)
        naf.plot()
        plt.savefig(dirname + naffile)

    # return render_to_response(template_name='sa_test.html', context=context, context_instance=RequestContext(request=request))
    return render(request=request, template_name='sa_test.html', context=context)
예제 #8
0
    if i == 0:
        plt.ylabel('Frac. in staying after $n$ years')
plt.tight_layout()

for i, dept in enumerate(depts):
    ix = data['dept'] == dept
    kmf.fit(T[ix], E[ix], label=dept)
    print(dept, kmf.median_)

# Looking at a hazard curve
from lifelines import NelsonAalenFitter
naf = NelsonAalenFitter()

naf.fit(T, event_observed=E)
print(naf.cumulative_hazard_.head())
naf.plot()

# This hazard curve shows us that there is low hazard of someone leaving starting off, then it gets worse,
# once you stay for 500 days you stay at least a bit more, then exponentially it gets worse!

# SURVIVAL REGRESSION -- figuring out the influences of other aspects on whether or not someone survives
# Can't use regular linear regression. Want to use Cox's model or Aalen's additive model.

# Cox's Proportional Hazard model
# "The idea behind the model is that the log-hazard of an individual is a linear function of their static covariates
# and a population-level baseline hazard that changes over time" - from https://lifelines.readthedocs.io/en/latest/Survival%20Regression.html

from lifelines.datasets import load_rossi
from lifelines import CoxPHFitter

rossi_dataset = load_rossi()
ax = plt.subplot(111)
for r in data['Has_Children'].unique():
    ix = data['Has_Children'] == r
    kmf.fit(data['Duration'].loc[ix], data['Divorce'].loc[ix], label=r)
    sns.set()
    ax = kmf.plot(title='Mariage Survival Estimate Based on Children',
                  ax=ax,
                  linewidth=2.5)
#Export the figure
plt.savefig('/home/raed/Dropbox/INSE - 6320/Final Project/Children.pdf')
plt.show()

naf = NelsonAalenFitter()
naf.fit(data['Duration'], data['Divorce'])
sns.set()
naf.plot(title='Cumulative hazard over time', legend=False)
print(naf.cumulative_hazard_.head(32))
plt.savefig(
    '/home/raed/Dropbox/INSE - 6320/Final Project/Cumulative_Hazard_function.pdf'
)
plt.show()

ax = plt.subplot(111)
for r in data['Couple_Race'].unique():
    ix = data['Couple_Race'] == r
    naf.fit(data['Duration'].loc[ix], data['Divorce'].loc[ix], label=r)
    sns.set()
    ax = naf.plot(title='Cumulative Hazard by Couple Race ',
                  ax=ax,
                  linewidth=2.5)
#Export the figure
예제 #10
0
     for r in cac_ranges:
         ix = cac_values == r
         if first == 0:
             kmf.fit(times[ix], censors[ix], label=r)
             ax = kmf.plot()
             first = 1
         else:
             kmf.fit(times[ix], censors[ix], label=r) 
             kmf.plot(ax=ax)
 
 elif curve == 'hazard':
     # Plot hazard curve
     naf = NelsonAalenFitter() 
     first = 0
     for r in cac_ranges:
         ix = cac_values == r
         if first == 0:
             naf.fit(times[ix], censors[ix], label=r)
             ax = naf.plot()
             first = 1
         else:
             naf.fit(times[ix], censors[ix], label=r) 
             naf.plot(ax=ax)            
                 
    
 ax.set_ylabel("%", fontsize=12)    
 ax.set_title(tag, fontsize=14)
 ax.set_xlabel("Years to event", fontsize=12)
 
 return times            
 
예제 #11
0
plt.ylim(0, 1)
plt.title("Lifespans of different Question types in First 500 Days")

# Test of significances between Question Types
from lifelines.statistics import logrank_test

results = logrank_test(T[short], T[~short], E[short], E[~short], alpha=.99)

results.print_summary()

# Applying output to a hazord curve.
from lifelines import NelsonAalenFitter
naf = NelsonAalenFitter()

naf.fit(T, event_observed=E)
naf.plot()

#By question length
naf.fit(T[short], event_observed=E[short], label="Shorter Questions")
ax = naf.plot(loc=slice(0, 200))
naf.fit(T[~short], event_observed=E[~short], label="Longer Questions")
naf.plot(ax=ax, loc=slice(0, 200))
plt.title("Cumulative hazard function by Question Length (up to 2000= days)")

# Aalen's Additive Model
from lifelines import CoxPHFitter
cph = CoxPHFitter()

#Covariance matrix
import patsy
sfm = patsy.dmatrix('Score_x + t_length + q_length +an_length-1',
예제 #12
0
def test(ini_file):
    ''' Performs training according to .ini file

    :param ini_file: (String) the path of .ini file
    :return best_c_index: the best c-index
    '''
    # reads configuration from .ini file
    config = read_config(ini_file)
    # builds network|criterion|optimizer based on configuration
    model = DeepSurv(config['network']).to(device)
    criterion = NegativeLogLikelihood(config['network'], device).to(device)

    # cph = CoxPHFitter()
    # constructs data loaders based on configuration
    train_dataset = SurvivalDataset(config['train']['h5_file'],
                                    is_train=True,
                                    device=device)
    test_dataset = SurvivalDataset(config['train']['h5_file'],
                                   is_train=False,
                                   device=device)
    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=train_dataset.__len__())
    test_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size=test_dataset.__len__())
    test_df = pd.read_csv(
        r'H:\project\DeepSurv\DeepSurv.pytorch-master\ours_test.csv',
        index_col=['PatientID'])
    train_df = pd.read_csv(
        r'H:\project\DeepSurv\DeepSurv.pytorch-master\ours_train.csv',
        index_col=['PatientID'])

    # train step
    best_c_index = 0
    # kmf = KaplanMeierFitter()
    naf = NelsonAalenFitter()
    # wf = WeibullFitter()
    naf.fit(test_df['Time_d'], event_observed=test_df['Event'])
    timeline = np.arange(0, 25000)
    base_risk = naf.predict(timeline)
    i = timeline[-1]
    while i > 0:
        base_risk[i] = base_risk[i] - base_risk[i - 1]
        i -= 1
    np.savetxt('temp.txt', base_risk, '%.17f')
    # base_risk.to_csv('test_base_risk.csv', header=True)

    model.load_state_dict(
        torch.load(os.path.join(models_dir,
                                ini_file.split('\\')[-1] + '.pth'))['model'])
    model.eval()

    for X, y, e in test_loader:
        with torch.no_grad():
            risk_pred = model(X)
            valid_loss = criterion(risk_pred, y, e, model)
            print(valid_loss)
            valid_c = c_index(-risk_pred, y, e)
            best_c_index = valid_c

            R = risk_pred.detach().cpu().numpy()[:, 0]
            for test_index in range(len(R)):
                # test_index = 120    # people
                _r = R[test_index]
                _y = y.detach().cpu().numpy()[test_index, 0]
                _e = e.detach().cpu().numpy()[test_index, 0]
                t0 = naf.predict(_y)

                risk = t0 * np.exp(_r)
                # print(np.exp(_r))
                print(risk, int(_e))

                # pre_y = 0.
                # m = np.min(np.where(p > 0.5))
                # print(int(_y), m, _e, p[int(_y)] >= 0.5)
                # if (p[int(_y)] >= 0.5) == bool(_e):
                #     ture += 1
            # print(ture/len(R))
            # if _e == pre_y:
            #     ture += 1
            # plt.plot(p)
            # plt.show()
    naf.plot()
    plt.show()
    return best_c_index
kmf.fit(T, event_observed=C)

kmf.survival_function_.plot()
plt.title('Survival of A (From the Start) Grade Restaurants in NYC')

print 'Median Time on Site is: ' + str(kmf.median_)

print 'Median Time on Site is: ' + str(kmf.median_)

## HAZARD FUNCTION:
from lifelines import NelsonAalenFitter
naf = NelsonAalenFitter()

naf.fit(T, event_observed=C)
ax = naf.plot(ix=slice(0, 1000), secondary_y=True)
c.duration.hist(bins=100).plot(title='Distribution of Grade Changes')
plt.show()

##### SPLIT BY BORO:

boro = df[['CAMIS', 'BORO']].drop_duplicates()

borod = pd.merge(data, boro, on=['CAMIS'])

ax = plt.subplot(111)
dem = (borod.BORO == "MANHATTAN")
kmf.fit(T[dem], event_observed=C[dem], label="MANHATTAN")
kmf.plot(ax=ax)
dem2 = (borod.BORO == "BRONX")
kmf.fit(T[dem2], event_observed=C[dem2], label="BRONX")
예제 #14
0
'''
VISUALIZATIONS
'''

# 1. Kaplan Meier Survivor Function
kmf = KaplanMeierFitter()
T = data['dur']
C = data['evt']
kmf.fit(T, event_observed=C)
fig1 = kmf.plot(title='Survivor Function, Drop Out')
fig1.savefig('fig1.png')

# 2. Nelson Aalen Cumulative Hazard Function
naf = NelsonAalenFitter()
naf.fit(T, event_observed=C)
fig2 = naf.plot(title='Cumulative Hazard Function, Drop Out')
fig2.savefig('fig2.png')

# 3. Cox Proportional Hazard Model
cph = CoxPHFitter()
cph.fit(data, 'sex', event_col='evt')
fig3 = cph.predict_survival_function(data).plot()
fig3.savefig('fig3.png')
'''
I couldn't make this one give me the result I wanted.
The functioning Stata code is:
stphplot, by(sex) nolntime
and the resulting visualization is...
'''
img = mpimg.imread('cph.png')
imgplot = plt.imshow(img)
예제 #15
0
def bayesian_model_estimation(T, E, iter_interpolate=2, n_pts=20):
    """ T is durations
        E is binary event flag
        iter_interpolate is number of iterations in posterior grid interpolation refinement (int, min.=1)
        n_pts is number of points in posterior
    """
    # Plot non-parametric curves
    kmf = KaplanMeierFitter()
    kmf.fit(T, event_observed=E)
    kmf.plot()
    # kmf.cumulative_density_.plot(figsize=(7,6))

    naf = NelsonAalenFitter()
    naf.fit(T, event_observed=E)
    plt.figure(figsize=(7, 6))
    naf.plot()
    plt.title('Cumulative hazard rate')

    # Fit exponential cumulative hazard model
    exf = ExponentialFitter().fit(
        T, E, label='ExponentialFitter'
    )  #  See https://lifelines.readthedocs.io/en/latest/Survival%20analysis%20with%20lifelines.html
    exf.plot_cumulative_hazard()
    print('fitted lambda = {}'.format(
        1 / exf.lambda_))  # Confidence bounds on this?  --> bootstrap?

    # Plot groundtruth curve
    plt.figure(figsize=(7, 6))
    x = np.arange(1, 30)
    plt.plot(x,
             expon(scale=1 / target_rate).sf(x),
             'g--',
             lw=2.5,
             alpha=.6,
             label='target')
    plt.plot(x,
             expon(scale=exf.lambda_).sf(x),
             'r-',
             lw=3,
             alpha=.7,
             label='fitted')
    plt.legend()
    plt.xlabel('duration (time since event arrival')
    plt.title('Survival curve')

    # Bayesian inference of lambda
    # ============================
    lam_range = np.linspace(0, .2, n_pts)
    for it in range(1, iter_interpolate + 1):
        print('\niteration {}'.format(it))
        prior = np.ones_like(lam_range)
        prior /= np.sum(prior)
        logprior = np.log(prior)
        logprior /= np.sum(logprior)

        # Compute likelihood in original dimension (dangerously small numbers!)
        # post = prior
        # for duration, event_flag in zip(T, E):
        #     if event_flag==1:
        #         post *= expon(scale=1/lam_range).pdf(duration)
        #     else:
        #         post *= expon(scale=1/lam_range).sf(duration)

        # Compute likelihood in log dimension
        logpost = logprior  #- lam_range*T.sum() + np.log(lam_range)*(1 - E).sum() # <-- vector implentation is wrong
        for duration, event_flag in zip(T, E):
            if event_flag == 1:
                logpost += expon(scale=1 / lam_range).logpdf(duration)
            else:
                logpost += expon(scale=1 / lam_range).logsf(duration)
        # Trick: shift entire log dist. by max.loglikel. before exponentiation to reduce potential underflow:
        maxlogl = np.max(logpost)
        post = np.exp(logpost - maxlogl)
        post /= np.sum(post)
        ExpectedVal = np.dot(lam_range, post)
        print('Mean of lambda posterior = {}'.format(ExpectedVal))
        print('MAE = {}'.format(np.abs(ExpectedVal - target_rate)))

        # Plot lambda posterior
        plt.figure(figsize=(7, 6))
        plt.plot(lam_range, post, 'b.-', lw=1, label='Bayes')
        plt.vlines(1 / exf.lambda_,
                   0,
                   1.2 * np.max(post),
                   color='m',
                   lw=3,
                   alpha=.6,
                   label='MLE')
        plt.vlines(target_rate,
                   0,
                   1.2 * np.max(post),
                   color='orange',
                   lw=3,
                   alpha=.9,
                   label='target')
        plt.vlines(ExpectedVal,
                   0,
                   1.2 * np.max(post),
                   color='b',
                   lw=3,
                   alpha=.6,
                   label='Bayes EV')
        plt.legend()
        plt.title('Lambda estimate (iteration {})'.format(it))
        plt.xlabel('lambda')

        # Refine posterior grid evaluation points
        if it <= iter_interpolate:
            cumul_prob_dens = post.cumsum()
            f = interp1d(cumul_prob_dens, lam_range)
            cdf_new_grid_pts = np.linspace(1e-2, 1 - 1e-2, n_pts)
            lam_range = f(cdf_new_grid_pts)
예제 #16
0
파일: kmna.py 프로젝트: xcodevn/SADP
data_ = zip(df.time/max(df.time), df.event.astype(int))
data  = [(a, b) for (a,b) in data_ if a >= config.GAMMA]

print("[*] Remove #%d outliers" % (len(data_) - len(data)))
N  = len(df) # number of data points

from lifelines import KaplanMeierFitter
from lifelines import NelsonAalenFitter

kmf = KaplanMeierFitter()
(T, E) = zip(*data)
kmf.fit(T, event_observed=E)
naf = NelsonAalenFitter()
naf.fit(T, event_observed=E)
ax = pyplot.subplot(121)
naf.plot(ax=ax)

ax = pyplot.subplot(122)
kmf.plot(ax=ax)

print naf.cumulative_hazard_
naf.cumulative_hazard_.to_csv("naf.csv")

pyplot.show()

data0  = [ a for (a,b) in data if b == 0 ]
data1  = [ a for (a,b) in data if b == 1 ]

his0,bin_edges0 = np.histogram(data0, bins=bins0, range=(config.GAMMA, 1))
his1,bin_edges1 = np.histogram(data1, bins=bins1, range=(config.GAMMA, 1))