Ejemplo n.º 1
0
def survival_ll_nelson_aalen(content):
	naf = NelsonAalenFitter()
	naf.fit(content['times'], event_observed=content['events'])
	return httpWrapper( json.dumps({
		'hazard': naf.cumulative_hazard_.to_dict(),
        'confidence': naf.confidence_interval_.to_dict()
		}, ignore_nan=True ))
Ejemplo n.º 2
0
def concat_hazard_curve(T, C):
    naf = NelsonAalenFitter(nelson_aalen_smoothing=False)
    naf.fit(T, event_observed=C)
    #return naf.smoothed_hazard_(bandwidth=bandwidth).reindex(range(1,max_idx+1))['differenced-NA_estimate'].values
    return naf.cumulative_hazard_.reindex(
        1, args.max_idx + 1).values, naf.confidence_interval_.reindex(
            1, args, max_idx + 1).values
Ejemplo n.º 3
0
def calcSurvHazardCat(df: pd.DataFrame, *, hazardcol: str = "hazard",) -> pd.DataFrame:

    """
    Calculate cumulative hazard survived for each individual patient, as an alternative
    to raw (and often censored) survival time.

    Parameters
    ----------
    df
        A data frame with two compulsory columns: time and event.
    hazardcol
        Column name for the survived hazard.

    Returns
    -------
    The input dataframe, with an extra column of hazards.
    """

    ### Fit survival Nelson-Aalen Estimator of Hazard on survival data
    T = df["time"]
    E = df["event"]
    naf = NelsonAalenFitter()
    naf.fit(T, E)
    df[hazardcol] = naf.predict(T).tolist()
    return df
Ejemplo n.º 4
0
def survival_ll_nelson_aalen(content):
    kmf = NelsonAalenFitter()
    kmf.fit(content['times'], event_observed=content['events'])
    return httpWrapper(
        json.dumps({
            'result': kmf.survival_function_,
            'hazard': cumulative_hazard_,
            'median': kmf.kmf.median_
        }))
    def fit(
        self, durations, event_observed=None, timeline=None, entry=None, label=None, alpha=None, ci_labels=None, weights=None
    ):  # pylint: disable=too-many-arguments
        """
        Parameters
        ----------
        durations: an array, or pd.Series, of length n
            duration subject was observed for
        timeline:
            return the best estimate at the values in timelines (positively increasing)
        event_observed: an array, or pd.Series, of length n
            True if the the death was observed, False if the event was lost (right-censored). Defaults all True if event_observed==None
        entry: an array, or pd.Series, of length n
           relative time when a subject entered the study. This is
           useful for left-truncated observations, i.e the birth event was not observed.
           If None, defaults to all 0 (all birth events observed.)
        label: string
            a string to name the column of the estimate.
        alpha: float, optional (default=0.05)
            the alpha value in the confidence intervals. Overrides the initializing
           alpha for this call to fit only.
        ci_labels: iterable
            add custom column names to the generated confidence intervals as a length-2 list: [<lower-bound name>, <upper-bound name>]. Default: <label>_lower_<alpha>


        Returns
        -------
          self, with new properties like ``survival_function_``.

        """
        self._label = coalesce(label, self._label, "BFH_estimate")
        alpha = coalesce(alpha, self.alpha)

        naf = NelsonAalenFitter(alpha=alpha)
        naf.fit(durations, event_observed=event_observed, timeline=timeline, label=self._label, entry=entry, ci_labels=ci_labels)
        self.durations, self.event_observed, self.timeline, self.entry, self.event_table, self.weights = (
            naf.durations,
            naf.event_observed,
            naf.timeline,
            naf.entry,
            naf.event_table,
            naf.weights,
        )

        # estimation
        self.survival_function_ = np.exp(-naf.cumulative_hazard_)
        self.confidence_interval_ = np.exp(-naf.confidence_interval_)
        self.confidence_interval_survival_function_ = self.confidence_interval_
        self.confidence_interval_cumulative_density = 1 - self.confidence_interval_

        # estimation methods
        self._estimation_method = "survival_function_"
        self._estimate_name = "survival_function_"

        # plotting functions
        self.plot_survival_function = self.plot
        return self
Ejemplo n.º 6
0
 def compute_terminal_node(self):
     """
     Compute the terminal node if condition has reached.
     :return: self
     """
     self.terminal = True
     self.chf = NelsonAalenFitter()
     t = self.y.iloc[:, 1]
     e = self.y.iloc[:, 0]
     self.chf.fit(t, event_observed=e, timeline=self.timeline)
     return self
Ejemplo n.º 7
0
def NelsonAelan_dash(T, C):
    naf = NelsonAalenFitter()
    naf.fit(T, event_observed=C)
    naf.plot(title='Nelson-Aalen Estimate')
    naf.plot(ci_force_lines=True, title='Nelson-Aalen Estimate')
    py_p = plt.gcf()
    pyplot(py_p, legend=False)
Ejemplo n.º 8
0
    def _vval2ByBootstrap(timeline, nstraps=1000):
        sa1_b, sa2_b = np.zeros((timeline.shape[0], nstraps)), np.zeros(
            (timeline.shape[0], nstraps))
        for sampi in range(nstraps):
            tmp = df.sample(frac=1, replace=True, axis=0)

            ind1 = tmp[treatment_col] == 0
            naf1 = NelsonAalenFitter()
            naf1.fit(durations=tmp.loc[ind1, duration_col],
                     event_observed=tmp.loc[ind1, event_col])
            sa1 = np.exp(-naf1.cumulative_hazard_.iloc[:, 0])
            sa1 = sa1.reindex(timeline, method='ffill')
            sa1_b[:, sampi] = sa1.values

            ind2 = df[treatment_col] == 1
            naf2 = NelsonAalenFitter()
            naf2.fit(durations=tmp.loc[ind2, duration_col],
                     event_observed=tmp.loc[ind2, event_col])
            sa2 = np.exp(-naf2.cumulative_hazard_.iloc[:, 0])
            sa2 = sa2.reindex(timeline, method='ffill')
            sa2_b[:, sampi] = sa2.values
        vval2 = 1 / np.sqrt(
            np.nanvar(np.log(sa1_b), axis=1) +
            np.nanvar(np.log(sa2_b), axis=1))
        return vval2
Ejemplo n.º 9
0
    def _fit_kaplan_meier(self):
        """ private method to fit Kaplan-Meier curve """
        if self.kmf_fit is not None:  # already fitted
            return

        # Overall
        kmf_fit = KaplanMeierFitter()
        kmf_fit.fit(self.time, event_observed=self.event, label=self.label)

        naf_case = NelsonAalenFitter()
        naf_case.fit(self.time, event_observed=self.event, label=self.label)

        self.kmf_fit = kmf_fit
        self.naf_fit = naf_case
Ejemplo n.º 10
0
 def test_naf_plotting_with_custom_colours(self, block):
     data1 = np.random.exponential(5, size=(200, 1))
     data2 = np.random.exponential(1, size=(500))
     naf = NelsonAalenFitter()
     naf.fit(data1)
     ax = naf.plot(color="r")
     naf.fit(data2)
     naf.plot(ax=ax, color="k")
     self.plt.title("test_naf_plotting_with_custom_coloirs")
     self.plt.show(block=block)
     return
Ejemplo n.º 11
0
 def test_naf_plotting_slice(self, block):
     data1 = np.random.exponential(5, size=(200, 1))
     data2 = np.random.exponential(1, size=(200, 1))
     naf = NelsonAalenFitter()
     naf.fit(data1)
     ax = naf.plot(loc=slice(0, None))
     naf.fit(data2)
     naf.plot(ax=ax, ci_force_lines=True, iloc=slice(100, 180))
     self.plt.title("test_naf_plotting_slice")
     self.plt.show(block=block)
     return
Ejemplo n.º 12
0
def go():
    print args
    T_all, C_all = concat_TC(all_files)
    T_m, C_m = concat_TC(files_m)
    T_f, C_f = concat_TC(files_f)
    for gender, (T, C) in zip(('all', 'm', 'f'),
                              ((T_all, C_all), (T_m, C_m), (T_f, C_f))):
        naf = NelsonAalenFitter(nelson_aalen_smoothing=False)
        naf.fit(T, event_observed=C)
        dill.dump(
            naf,
            open(
                '/backup/home/jared/storage/foraging/cm/{}_{}_shuffle_{}_{}_{}'
                .format(gender, args.mode, args.min_length, args.ignore_first,
                        args.memory), 'wb'))
Ejemplo n.º 13
0
    def survival_plot_and_cox(self, df_arr, label=[], filename=''):
        plt.clf()
        color = ['red', 'green', 'blue', 'cyan', 'orange', 'black']

        kmf = KaplanMeierFitter()
        naf = NelsonAalenFitter()

        for a in range(len(df_arr)):
            df_el = df_arr[a]
            if a == 0:
                kmf.fit(df_el['bcrmonth'], df_el['bcrstatus'], label=label[a])
                ax = kmf.plot(show_censors=True,
                              ci_show=False,
                              color=color[a],
                              ylim=(0, 1))
            else:
                kmf.fit(df_el['bcrmonth'], df_el['bcrstatus'], label=label[a])
                kmf.plot(ax=ax,
                         show_censors=True,
                         ci_show=False,
                         color=color[a],
                         ylim=(0, 1))

        fig = ax.get_figure()
        fig.savefig(filename + '.png')
        fig.savefig(filename + '.pdf', format='PDF')
Ejemplo n.º 14
0
def get_scores(model, y_test, delta_test, time_grid, surv_residual = False, cens_residual = False):
    n = y_test.shape[0]
    x_train, target = model.training_data
    y_train, delta_train = target

    # compute residual from training data
    exp_residual_train = np.nan_to_num(np.exp(np.log(y_train) - model.predict(x_train).reshape(-1)))
    exp_residual_test = np.nan_to_num(np.exp(np.log(y_test) - model.predict(x_test).reshape(-1)))

    # compute exp(-theta) from test data to evaluate accelerating component
    exp_predict_neg_test = np.nan_to_num(np.exp(-model.predict(x_test)).reshape(-1))

    naf_base = NelsonAalenFitter().fit(y_train, event_observed = delta_train)
    kmf_cens = KaplanMeierFitter().fit(y_train, event_observed = 1 - delta_train)
    
    if cens_residual == True:
        cens_test = kmf_cens.survival_function_at_times(exp_residual_test)
    elif cens_residual == False:
        cens_test = kmf_cens.survival_function_at_times(y_test)

    bss = []
    nblls = []
    for t in time_grid:
        bs, nbll = get_score(n, t, y_test, delta_test, naf_base, kmf_cens, cens_test, exp_predict_neg_test, surv_residual, cens_residual, model)
        bss.append(bs)
        nblls.append(-nbll)

    return (np.array(bss), np.array(nblls))
Ejemplo n.º 15
0
 def _estimateSurv(df, ind):
     naf = NelsonAalenFitter()
     naf.fit(durations=df.loc[ind, duration_col], event_observed=df.loc[ind, event_col])
     
     """Borrowed from lifelines"""
     timeline = sorted(naf.timeline)
     deaths = naf.event_table['observed']
     """Slowest line here."""
     population = naf.event_table['entrance'].cumsum() - naf.event_table['removed'].cumsum().shift(1).fillna(0)
     varsa = np.cumsum(_additive_var(population, deaths))
     varsa = varsa.reindex(timeline, method='pad')
     varsa.index.name = 'timeline'
     varsa.name = 'surv_var'
     
     sa = np.exp(-naf.cumulative_hazard_.iloc[:, 0])
     sa.name = 'surv'
     return naf, sa, varsa
Ejemplo n.º 16
0
 def _estimateSurv(df, ind):
     naf = NelsonAalenFitter()
     naf.fit(durations=df.loc[ind, duration_col], event_observed=df.loc[ind, event_col])
     
     """Borrowed from lifelines"""
     timeline = sorted(naf.timeline)
     deaths = naf.event_table['observed']
     """Slowest line here."""
     population = naf.event_table['entrance'].cumsum() - naf.event_table['removed'].cumsum().shift(1).fillna(0)
     varsa = np.cumsum(_additive_var(population, deaths))
     varsa = varsa.reindex(timeline, method='pad')
     varsa.index.name = 'timeline'
     varsa.name = 'surv_var'
     
     sa = np.exp(-naf.cumulative_hazard_.iloc[:, 0])
     sa.name = 'surv'
     return naf, sa, varsa
Ejemplo n.º 17
0
def get_hazard_ratio_results(df, group_col, time_col, event_col):
    models = []
    summary_ = None
    summary_result = None
    df = df[[event_col, time_col, group_col]].dropna()
    df[event_col] = df[event_col].astype('category')
    df[event_col] = df[event_col].cat.codes
    df[time_col] = df[time_col].astype('float')
    if not df.empty:
        for name, grouped_df in df.groupby(group_col):
            hr = NelsonAalenFitter()
            t = grouped_df[time_col]
            e = grouped_df[event_col]
            hr.fit(t,
                   event_observed=e,
                   label=name + " (N=" + str(len(t.tolist())) + ")")
            models.append(hr)

    return models
Ejemplo n.º 18
0
 def test_naf_plot_cumulative_hazard(self, block):
     data1 = np.random.exponential(5, size=(200, 1))
     naf = NelsonAalenFitter()
     naf.fit(data1)
     ax = naf.plot()
     naf.plot_cumulative_hazard(ax=ax, ci_force_lines=True)
     self.plt.title("I should have plotted the same thing, but different styles + color!")
     self.plt.show(block=block)
     return
Ejemplo n.º 19
0
def plot_hazard(df, TName, EName=None, groupBy=None, splitBy=None, params={}):
    print('\tHazard')
    ylabel, naf = 'Hazard_Rate', NelsonAalenFitter()
    params['ylabel'] = ylabel
    return plot_any(df,
                    fitter=naf,
                    TName=TName,
                    EName=EName,
                    groupBy=groupBy,
                    splitBy=splitBy,
                    params=params)
Ejemplo n.º 20
0
def get_surv(model, x_test, timegrid=None):
    '''
    model: PyCox model class or compatibles
    x_test: covariate dataset to compute survival estimates
    timegrid: option to set upperbound of time grid to "Y" of training dataset
    '''
    warnings.simplefilter(action='ignore', category=FutureWarning)
    warnings.simplefilter(action='ignore', category=RuntimeWarning)

    x_train, target = model.training_data
    y_train, delta_train = target

    # compute residual from training data
    exp_residual = np.nan_to_num(
        np.exp(np.log(y_train) - model.predict(x_train).reshape(-1)))

    # compute exp(-theta) from test data to evaluate accelerating component
    exp_predict = np.nan_to_num(np.exp(-model.predict(x_test)).reshape(-1))

    # estimate cumulative baseline hazard function
    # based on training dataset
    H = NelsonAalenFitter().fit(exp_residual,
                                event_observed=delta_train).cumulative_hazard_

    # extract timegrid and estimated hazards
    if timegrid == "train":
        max_time = y_train.max()
    else:
        max_time = max(H.index)

    if H.shape[0] * exp_predict.shape[0] >= 5 * 10e7:
        l = round(5 * 10e7 / exp_predict.shape[0])
        time_grid = np.quantile(a=H.loc[H.index <= max_time].index.values,
                                q=[i / l for i in range(l + 1)],
                                interpolation='nearest')
    else:
        time_grid = H.loc[H.index <= max_time].index.values

    H_base = H.loc[time_grid].values.reshape(-1)

    h_base = H_base[1:] - H_base[:-1]
    h_base = np.repeat(h_base.reshape(-1, 1), exp_predict.shape[0], axis=1)

    # evaluate conditional cumulative hazard estimates
    # based on test dataset
    surv = pd.DataFrame(np.exp(-np.cumsum(h_base * exp_predict, axis=0)),
                        index=time_grid[1:],
                        columns=[i for i in range(exp_predict.shape[0])])

    surv.index.names = ["duration"]

    return surv
Ejemplo n.º 21
0
 def test_naf_plot_cumulative_hazard_bandwith_1(self, block):
     data1 = np.random.exponential(5, size=(2000, 1)) ** 2
     naf = NelsonAalenFitter()
     naf.fit(data1)
     naf.plot_hazard(bandwidth=5.0, iloc=slice(0, 1700))
     self.plt.title("test_naf_plot_cumulative_hazard_bandwith_1")
     self.plt.show(block=block)
     return
Ejemplo n.º 22
0
def createHazardGraph(durations, event_observed):
    naf = NelsonAalenFitter()
    naf.fit(durations, event_observed)
    naf.plot(ci_show=False)

    plt.title("Hard Drive Nelson-Aalen Hazard Estimate")
    plt.ylabel("Cumulative Hazard")
    plt.show()
Ejemplo n.º 23
0
def plot_HR(df, with_ci=False):
    T = df['days_survived']
    E = df['death']
    naf = NelsonAalenFitter()

    cutoff = np.percentile(df['risk'], 75)
    high_risk = df['risk'] > cutoff

    naf.fit(T[high_risk], event_observed=E[high_risk], label='High_Risk')
    ax = naf.plot(ci_show=with_ci)
    naf.fit(T[~high_risk], event_observed=E[~high_risk], label='Low_Risk')
    naf.plot(ax=ax, ci_show=with_ci)

    plt.ylim(0, .1)
    plt.xlabel("Days")
    plt.ylabel("Risk of Death")
    plt.title("Cardiovascular Death Risk over time (top quartile)")
    if with_ci:
        plt.savefig("./hr_with_ci.png")
    else:
        plt.savefig("./hr_without_ci.png")
Ejemplo n.º 24
0
def get_surv(model, x_test, timegrid="train"):
    '''
    model: PyCox model class or compatibles
    x_test: covariate dataset to compute survival estimates
    '''
    warnings.simplefilter(action='ignore', category=FutureWarning)

    x_train, target = model.training_data
    y_train, delta_train = target

    # compute residual from training data
    exp_residual = np.exp(np.log(y_train) - model.predict(x_train).reshape(-1))

    # compute exp(-theta) from test data to evaluate accelerating component
    exp_predict = np.exp(-model.predict(x_test)).reshape(-1)

    # estimate cumulative baseline hazard function
    # based on training dataset
    H = NelsonAalenFitter().fit(exp_residual,
                                event_observed=delta_train).cumulative_hazard_

    # extract timegrid and estimated hazards
    time_grid = H.index.to_numpy()[1:]
    H_base = H.values.reshape(-1)

    h_base = H_base[1:] - H_base[:-1]
    h_base = np.repeat(h_base.reshape(-1, 1), exp_predict.shape[0], axis=1)

    # evaluate conditional cumulative hazard estimates
    # based on test dataset
    surv = pd.DataFrame(np.exp(-np.cumsum(h_base * exp_predict, axis=0)),
                        index=time_grid,
                        columns=[i for i in range(exp_predict.shape[0])])
    surv.index.names = ["duration"]

    # set upperbound of time grid to "Y" of training dataset
    # (to be comparable to survival predictions from PyCox models)
    if timegrid == "train":
        surv = surv.loc[surv.index <= y_train.max()]

    return surv
Ejemplo n.º 25
0
    def _vval2ByBootstrap(timeline, nstraps=1000):
        sa1_b, sa2_b = np.zeros((timeline.shape[0], nstraps)), np.zeros((timeline.shape[0], nstraps))
        for sampi in range(nstraps):
            tmp = df.sample(frac=1, replace=True, axis=0)

            ind1 = tmp[treatment_col] == 0
            naf1 = NelsonAalenFitter()
            naf1.fit(durations=tmp.loc[ind1, duration_col], event_observed=tmp.loc[ind1, event_col])
            sa1 = np.exp(-naf1.cumulative_hazard_.iloc[:, 0])
            sa1 = sa1.reindex(timeline, method='ffill')
            sa1_b[:, sampi] = sa1.values
            
            ind2 = df[treatment_col] == 1
            naf2 = NelsonAalenFitter()
            naf2.fit(durations=tmp.loc[ind2, duration_col], event_observed=tmp.loc[ind2, event_col])
            sa2 = np.exp(-naf2.cumulative_hazard_.iloc[:, 0])
            sa2 = sa2.reindex(timeline, method='ffill')
            sa2_b[:, sampi] = sa2.values
        vval2 = 1/np.sqrt(np.nanvar(np.log(sa1_b), axis=1) + np.nanvar(np.log(sa2_b), axis=1))
        return vval2
Ejemplo n.º 26
0
def get_sa(request):
    dirname = os.path.dirname(os.path.dirname(__file__)).replace('\\', '/')
    kmffile = '/images/test1.jpg'
    naffile = '/images/test2.jpg'
    context = {}
    context['kmf'] = kmffile
    context['naf'] = naffile
    if not os.path.exists(dirname + kmffile) and not os.path.exists(dirname + naffile):
        df = load_waltons()
        T = df['T']  # an array of durations
        E = df['E']  # a either boolean or binary array representing whether the 'death' was observed (alternatively an individual can be censored)
        kmf = KaplanMeierFitter(alpha=0.95)
        kmf.fit(durations=T, event_observed=E, timeline=None, entry=None, label='KM_estimate', alpha=None, left_censorship=False, ci_labels=None)

        naf = NelsonAalenFitter(alpha=0.95, nelson_aalen_smoothing=True)
        naf.fit(durations=T, event_observed=E, timeline=None, entry=None, label='NA_estimate', alpha=None, ci_labels=None)

        kmf.plot()
        plt.savefig(dirname + kmffile)
        naf.plot()
        plt.savefig(dirname + naffile)

    # return render_to_response(template_name='sa_test.html', context=context, context_instance=RequestContext(request=request))
    return render(request=request, template_name='sa_test.html', context=context)
Ejemplo n.º 27
0
Archivo: kmna.py Proyecto: xcodevn/SADP
EPS_LIST = [0.05,0.1,0.2,0.4,0.8,1.6]

bins0 = config.BIN0
bins1 = config.BIN1

df = pd.read_stata("wichert.dta")
data_ = zip(df.time/max(df.time), df.event.astype(int))
data  = [(a, b) for (a,b) in data_ if a >= config.GAMMA]

print("[*] Remove #%d outliers" % (len(data_) - len(data)))
N  = len(df) # number of data points

#kmf = KaplanMeierFitter()
(T, E) = zip(*data)
#kmf.fit(T, event_observed=E)
naf = NelsonAalenFitter()
naf.fit(T, event_observed=E)
#ax = pyplot.subplot(121)
#naf.plot(ax=ax)

#ax = pyplot.subplot(122)
#kmf.plot(ax=ax)

true_value =  naf.cumulative_hazard_.values
#naf.cumulative_hazard_.to_csv("naf.csv")

#pyplot.show()

data0  = [ a for (a,b) in data if b == 0 ]
data1  = [ a for (a,b) in data if b == 1 ]
Ejemplo n.º 28
0
LD = LCL.load_lending_data(load_files,keep_status,keep_terms,keep_grades)

print('loaded {0} loans'.format(len(LD)))

print_figs = False

#%%
#load long/lat data for each zip-code
zip3_data = LCL.load_location_data(data_dir,group_by='zip3') 
LD['zip3'] = LD['zip3'].astype(int)       
LD = pd.merge(LD, zip3_data, how='inner', left_on='zip3', right_index=True)

#%% Compute hazard functions for each loan grade and term 
term_bandwidths = [4., 8.] #list of NAF smoothing bandwidth (for each term)
naf = NelsonAalenFitter(nelson_aalen_smoothing=False) #init NAF model

all_hazards = {} #initialize dict to store hazard functions
for idx,term in enumerate(keep_terms): #compute all hazard functions for each term
    
    cur_data = LD[LD.term==term]
    lifetimes = cur_data['num_pymnts'].copy() #lifetime is number of payments received
    lifetimes.ix[cur_data.loan_status == 'Fully Paid'] = term #if the loan is fully paid set the lifetime to the full term
    is_observed = cur_data.loan_status.isin(['Charged Off']) #observed loans are just the ones that have been charged off, rest are censored   
    
    all_hazards[term] = np.zeros((len(keep_grades),term+1)) #initialize matrix of hazard functions

    for gidx,grade in enumerate(keep_grades): #fit model for each grade
        grade_data = cur_data.grade == grade
        naf.fit(lifetimes[grade_data],event_observed=is_observed[grade_data],label=grade,timeline=np.arange(term+1))
        all_hazards[term][gidx,:] = naf.smoothed_hazard_(term_bandwidths[idx]).squeeze()
    def fit(
        self,
        durations,
        event_observed=None,
        timeline=None,
        entry=None,
        label="BFH_estimate",
        alpha=None,
        ci_labels=None,
    ):  # pylint: disable=too-many-arguments
        """
        Parameters
        ----------
        durations: an array, or pd.Series, of length n
            duration subject was observed for
        timeline:
            return the best estimate at the values in timelines (positively increasing)
        event_observed: an array, or pd.Series, of length n
            True if the the death was observed, False if the event was lost (right-censored). Defaults all True if event_observed==None
        entry: an array, or pd.Series, of length n
           relative time when a subject entered the study. This is
           useful for left-truncated observations, i.e the birth event was not observed.
           If None, defaults to all 0 (all birth events observed.)
        label: string
            a string to name the column of the estimate.
        alpha: float, optional (default=0.05)
            the alpha value in the confidence intervals. Overrides the initializing
           alpha for this call to fit only.
        ci_labels: iterable
            add custom column names to the generated confidence intervals as a length-2 list: [<lower-bound name>, <upper-bound name>]. Default: <label>_lower_<alpha>


        Returns
        -------
          self, with new properties like ``survival_function_``.

        """
        self._label = label
        alpha = coalesce(alpha, self.alpha)

        naf = NelsonAalenFitter(alpha=alpha)
        naf.fit(
            durations, event_observed=event_observed, timeline=timeline, label=label, entry=entry, ci_labels=ci_labels
        )
        self.durations, self.event_observed, self.timeline, self.entry, self.event_table = (
            naf.durations,
            naf.event_observed,
            naf.timeline,
            naf.entry,
            naf.event_table,
        )

        # estimation
        self.survival_function_ = np.exp(-naf.cumulative_hazard_)
        self.confidence_interval_ = np.exp(-naf.confidence_interval_)

        # estimation methods
        self._estimation_method = "survival_function_"
        self._estimate_name = "survival_function_"
        self._update_docstrings()

        # plotting functions
        self.plot_survival_function = self.plot
        return self
Ejemplo n.º 30
0
     # Plot Kaplan-Meier curve
     kmf = KaplanMeierFitter()
     first = 0
     for r in cac_ranges:
         ix = cac_values == r
         if first == 0:
             kmf.fit(times[ix], censors[ix], label=r)
             ax = kmf.plot()
             first = 1
         else:
             kmf.fit(times[ix], censors[ix], label=r) 
             kmf.plot(ax=ax)
 
 elif curve == 'hazard':
     # Plot hazard curve
     naf = NelsonAalenFitter() 
     first = 0
     for r in cac_ranges:
         ix = cac_values == r
         if first == 0:
             naf.fit(times[ix], censors[ix], label=r)
             ax = naf.plot()
             first = 1
         else:
             naf.fit(times[ix], censors[ix], label=r) 
             naf.plot(ax=ax)            
                 
    
 ax.set_ylabel("%", fontsize=12)    
 ax.set_title(tag, fontsize=14)
 ax.set_xlabel("Years to event", fontsize=12)
plt.savefig('/home/raed/Dropbox/INSE - 6320/Final Project/income_States.pdf')
plt.show()

ax = plt.subplot(111)
for r in data['Has_Children'].unique():
    ix = data['Has_Children'] == r
    kmf.fit(data['Duration'].loc[ix], data['Divorce'].loc[ix], label=r)
    sns.set()
    ax = kmf.plot(title='Mariage Survival Estimate Based on Children',
                  ax=ax,
                  linewidth=2.5)
#Export the figure
plt.savefig('/home/raed/Dropbox/INSE - 6320/Final Project/Children.pdf')
plt.show()

naf = NelsonAalenFitter()
naf.fit(data['Duration'], data['Divorce'])
sns.set()
naf.plot(title='Cumulative hazard over time', legend=False)
print(naf.cumulative_hazard_.head(32))
plt.savefig(
    '/home/raed/Dropbox/INSE - 6320/Final Project/Cumulative_Hazard_function.pdf'
)
plt.show()

ax = plt.subplot(111)
for r in data['Couple_Race'].unique():
    ix = data['Couple_Race'] == r
    naf.fit(data['Duration'].loc[ix], data['Divorce'].loc[ix], label=r)
    sns.set()
    ax = naf.plot(title='Cumulative Hazard by Couple Race ',
Ejemplo n.º 32
0
    kmf.fit(T[ix], E[ix], label=dept)
    kmf.plot(ax=ax, legend=False)
    plt.title(dept)
    plt.xlim(0, 1000)
    if i == 0:
        plt.ylabel('Frac. in staying after $n$ years')
plt.tight_layout()

for i, dept in enumerate(depts):
    ix = data['dept'] == dept
    kmf.fit(T[ix], E[ix], label=dept)
    print(dept, kmf.median_)

# Looking at a hazard curve
from lifelines import NelsonAalenFitter
naf = NelsonAalenFitter()

naf.fit(T, event_observed=E)
print(naf.cumulative_hazard_.head())
naf.plot()

# This hazard curve shows us that there is low hazard of someone leaving starting off, then it gets worse,
# once you stay for 500 days you stay at least a bit more, then exponentially it gets worse!

# SURVIVAL REGRESSION -- figuring out the influences of other aspects on whether or not someone survives
# Can't use regular linear regression. Want to use Cox's model or Aalen's additive model.

# Cox's Proportional Hazard model
# "The idea behind the model is that the log-hazard of an individual is a linear function of their static covariates
# and a population-level baseline hazard that changes over time" - from https://lifelines.readthedocs.io/en/latest/Survival%20Regression.html
Ejemplo n.º 33
0
                seen_data_events.add(time_to_event)

                data_events = np.append(data_events,np.array([time_to_event]*num_repair))
            for v in sales_dict.values():
                #investigate why some negative leftovers on certain valid dates , more repairs than sales ???
                if v>0:
                    data_events = np.append(data_events,np.zeros(v))

            t=[]
            if len(data_events)==0:
                all_data.append([0]*19)
                continue

            data_events[data_events==0] = 70
            C= data_events <70
            naf = NelsonAalenFitter()
            naf.fit(data_events, event_observed=C )
            y_h =  np.array(naf.cumulative_hazard_).reshape(len(naf.cumulative_hazard_))
            x= np.array(naf.cumulative_hazard_.index).astype(int)

            seen_data_events.add(0)
            seen_data_events.add(70)

            if len(y_h) > 14:
                slope, intercept, r_value, p_value, std_err = stats.linregress(x[len(x)-5:len(x)-1],y_h[len(y_h)-5:len(y_h)-1])
                
                #plt.figure()
                #plt.plot(x, y_h, 'ko')
                #plt.plot(x, linear_f(x,slope,intercept ), 'r-')

                #plt.legend()
Ejemplo n.º 34
0
                seen_data_events.add(time_to_event)

                data_events = np.append(data_events,np.array([time_to_event]*num_repair))
            for v in sales_dict.values():
                #investigate why some negative leftovers on certain valid dates , more repairs than sales ???
                if v>0:
                    data_events = np.append(data_events,np.zeros(v))

            t=[]
            if len(data_events)==0:
                all_data.append([0]*19)
                continue

            data_events[data_events==0] = 160
            C= data_events <160
            naf = NelsonAalenFitter()
            naf.fit(data_events, censorship=C )

            y_h =  np.array(naf.cumulative_hazard_).reshape(len(naf.cumulative_hazard_))
            x= np.array(naf.cumulative_hazard_.index).astype(int)

            seen_data_events.add(0)
            seen_data_events.add(160)

            if len(y_h) > 14:
                slope, intercept, r_value, p_value, std_err = stats.linregress(x[len(x)-5:len(x)-1],y_h[len(y_h)-5:len(y_h)-1])
                #plt.figure()
                #plt.plot(x, y_h, 'ko')
                #plt.plot(x, linear_f(x,slope,intercept ), 'r-')

                #plt.legend()
Ejemplo n.º 35
0
# WeibullFitter
############################################################
from lifelines import WeibullFitter

wf = WeibullFitter()
wf.fit(T, E)
print(wf.lambda_, wf.rho_)
wf.print_summary()
wf.plot()

############################################################
# NelsonAalenFitter
############################################################
from lifelines import NelsonAalenFitter

naf = NelsonAalenFitter()

naf.fit(T, event_observed=E)
naf.plot()

# univariate analysis: cum hazard
# ORIG_CHN
ax = plt.subplot()
for chn in df_cox.ORIG_CHN.unique():
    is_chn = (df_cox.ORIG_CHN == chn)
    naf.fit(T[is_chn], event_observed=E[is_chn], label=chn)
    naf.plot(ax=ax)

# PURPOSE
ax = plt.subplot()
for purpose in df_cox.PURPOSE.unique():
Ejemplo n.º 36
0
1  13  1  miR-137
2  13  1  miR-137
3  13  1  miR-137
4  19  1  miR-137
"""

T = df['T']
E = df['E']

# Fit the survival curve
kmf = KaplanMeierFitter()
kmf.fit(T, event_observed=E)  # or, more succiently, kmf.fit(T, E)
kmf.plot()

# Plot cumulative hazard function
naf = NelsonAalenFitter()
naf.fit(T, E)
naf.plot()

#------------------------------------------------------------------------------
#        Multiple groups
#------------------------------------------------------------------------------
groups = df['group']
ix = (groups == 'miR-137')

kmf.fit(T[~ix], E[~ix], label='control')
ax = kmf.plot()

kmf.fit(T[ix], E[ix], label='miR-137')
kmf.plot(ax=ax)
Ejemplo n.º 37
0
class Node:

    score = 0
    split_val = None
    split_var = None
    lhs = None
    rhs = None
    chf = None
    chf_terminal = None
    terminal = False

    def __init__(self,
                 x,
                 y,
                 tree,
                 f_idxs,
                 n_features,
                 unique_deaths=1,
                 min_leaf=1,
                 random_state=None):
        """
        A Node of the Survival Tree.
        :param x: The input samples. Should be a Dataframe with the shape [n_samples, n_features].
        :param y: The target values as a Dataframe with the survival time in the first column and the event.
        :param tree: The corresponding Survival Tree
        :param f_idxs: The indices of the features to use.
        :param n_features: The number of features to use.
        :param unique_deaths: The minimum number of unique deaths required to be at a leaf node.
        :param min_leaf: The minimum number of samples required to be at a leaf node. A split point at any depth will
        only be considered if it leaves at least min_leaf training samples in each of the left and right branches.
        """
        self.x = x
        self.y = y
        self.tree = tree
        self.f_idxs = f_idxs
        self.n_features = n_features
        self.unique_deaths = unique_deaths
        self.random_state = random_state
        self.min_leaf = min_leaf
        self.grow_tree()

    def grow_tree(self):
        """
        Grow tree by calculating the Nodes recursively.
        :return: self
        """
        unique_deaths = self.y.iloc[:,
                                    1].reset_index().drop_duplicates().sum()[1]

        if unique_deaths <= self.unique_deaths:
            self.compute_terminal_node()
            return self

        self.score, self.split_val, self.split_var, lhs_idxs_opt, rhs_idxs_opt = splitting.find_split(
            self)

        if self.split_var is None:
            self.compute_terminal_node()
            return self

        if self.random_state is None:
            lf_idxs = np.random.permutation(self.x.shape[1])[:self.n_features]
            rf_idxs = np.random.permutation(self.x.shape[1])[:self.n_features]
        else:
            lf_idxs = np.random.RandomState(
                seed=self.random_state).permutation(
                    self.x.shape[1])[:self.n_features]
            rf_idxs = np.random.RandomState(
                seed=self.random_state).permutation(
                    self.x.shape[1])[:self.n_features]

        self.lhs = Node(self.x.iloc[lhs_idxs_opt, :],
                        self.y.iloc[lhs_idxs_opt, :],
                        self.tree,
                        lf_idxs,
                        self.n_features,
                        min_leaf=self.min_leaf,
                        random_state=self.random_state)

        self.rhs = Node(self.x.iloc[rhs_idxs_opt, :],
                        self.y.iloc[rhs_idxs_opt, :],
                        self.tree,
                        rf_idxs,
                        self.n_features,
                        min_leaf=self.min_leaf,
                        random_state=self.random_state)

        return self

    def compute_terminal_node(self):
        """
        Compute the terminal node if condition has reached.
        :return: self
        """
        self.terminal = True
        self.chf = NelsonAalenFitter()
        t = self.y.iloc[:, 0]
        e = self.y.iloc[:, 1]
        self.chf.fit(t, event_observed=e, timeline=self.tree.timeline)

        return self

    def predict(self, x):
        """
        Predict the cumulative hazard function if its a terminal node. If not walk through the tree.
        :param x: The input sample.
        :return: Predicted cumulative hazard function if terminal node
        """
        if self.terminal:
            self.tree.chf = self.chf.cumulative_hazard_
            self.tree.chf = self.tree.chf.iloc[:, 0]
            return self.tree.chf

        else:
            if x[self.split_var] <= self.split_val:
                self.lhs.predict(x)
            else:
                self.rhs.predict(x)
Ejemplo n.º 38
0
Archivo: kmna.py Proyecto: xcodevn/SADP
import pandas as pd
df = pd.read_stata("wichert.dta")
data_ = zip(df.time/max(df.time), df.event.astype(int))
data  = [(a, b) for (a,b) in data_ if a >= config.GAMMA]

print("[*] Remove #%d outliers" % (len(data_) - len(data)))
N  = len(df) # number of data points

from lifelines import KaplanMeierFitter
from lifelines import NelsonAalenFitter

kmf = KaplanMeierFitter()
(T, E) = zip(*data)
kmf.fit(T, event_observed=E)
naf = NelsonAalenFitter()
naf.fit(T, event_observed=E)
ax = pyplot.subplot(121)
naf.plot(ax=ax)

ax = pyplot.subplot(122)
kmf.plot(ax=ax)

print naf.cumulative_hazard_
naf.cumulative_hazard_.to_csv("naf.csv")

pyplot.show()

data0  = [ a for (a,b) in data if b == 0 ]
data1  = [ a for (a,b) in data if b == 1 ]