Esempio n. 1
0
def hazard(sf):
    tm = s.surv_times
    pr = s.surv_prob
    ii = (pr > 0)
    tm = tm[ii]
    pr = pr[ii]
    lpr = np.log(pr)
    return tm[0:-1], -np.diff(lpr) / np.diff(tm)

# Plot hazard functions for women and men
plt.clf()
plt.grid(True)
sex = {0: "Male", 1: "Female"}
for female in (0, 1):
    ii = df.female == female
    s = sm.SurvfuncRight(df.loc[ii, "end"], df.loc[ii, "mortstat"], entry=df.loc[ii, "age_int"])
    tm, hz = hazard(s)
    ha = sm.nonparametric.lowess(np.log(hz), tm/12)
    plt.plot(ha[:, 0], ha[:, 1], lw=3, label=sex[female])
ha, lb = plt.gca().get_legend_handles_labels()
leg = plt.figlegend(ha, lb, "upper center", ncol=2)
leg.draw_frame(False)
plt.xlabel("Age", size=15)
plt.ylabel("Log hazard", size=15)
plt.xlim(18, 90)
pdf.savefig()
pdf.close()

#
# Model 1
#
Esempio n. 2
0
# Some survival analysis methods have trouble with event times that are
# equal to 0, so we remove those cases here.

df = df.loc[df.lifespan > 0]

# Plot the poduct-limit survival function estimates for females and for
# males.

# +
ax = plt.axes()
plt.grid(True)
plt.xlabel("Lifespan (years)", size=15)
plt.ylabel("Proportion", size=15)
for sex in "Female", "Male":
    ii = df.Gender == sex
    s = sm.SurvfuncRight(df.loc[ii, "lifespan"], np.ones(ii.sum()), title=sex)
    s.plot(ax=ax)

# Create a legend
ha, lb = ax.get_legend_handles_labels()
ha = [ha[0], ha[2]]  # Optional, hide points from legend
lb = [lb[0], lb[2]]
leg = plt.figlegend(ha, lb, loc="center right")
leg.draw_frame(False)
# -

# ## Hazard function estimates for females and males

# The hazard function is the derivative of -log(S(t)), where S(t) is the
# survival function.  Below we will create hazard function estimates for
# females and for males by taking the numerical derivative of the log survival
                right_on='Id',
                how='inner',
                validate='1:1')
dfEU = pd.concat(
    [dfEU, pd.get_dummies(dfEU['user_types'], prefix='type')], axis=1)

mod = smf.phreg("Seniority_days ~ 0 + type_2 + type_3",
                status=dfEU['reached'].values,
                data=dfEU,
                ties="efron")
rslt = mod.fit()
print(rslt.summary())

fig, ax = plt.subplots()
for tp in range(1, 4):
    sf = sm.SurvfuncRight(dfEU.loc[dfEU['user_types'] == tp, "Seniority_days"],
                          dfEU.loc[dfEU['user_types'] == tp, "reached"])
    sf.plot(ax)
li = ax.get_lines()
li[1].set_visible(False)  # removes crosses (not clear what they mean)
li[3].set_visible(False)
li[5].set_visible(False)
plt.legend((li[0], li[2], li[4]), ('Type 1', 'Type 2', 'Type 3'))
plt.ylim(0.8, 1)
plt.ylabel("Proportion not editor")
plt.xlabel("Days of participation")
plt.title("Survival function - time to become Editor (before website' design)")
plt.tight_layout()
# saved as survivalByTypes_earlyT.png

# late users history (those ones never concerned by the change in threshold)
LUhist = hist.loc[(~hist['user'].isin(user_beforeGrad))]
Esempio n. 4
0
def core(X, Y1, Y2, Z=None):
    '''
    Y1: pd.Series,生存时间, 定量数据
    Y2: pd.Series,生存状态, 定类数据, 只能为0或者1, 1表示活, 0 表示死
    
    X: pd.DataFrame,药物组合的类型、年龄等等定类或者定量数据
    
    Z: pd.Series, 分层项,定类数据
    
    '''

    X = X.reset_index(drop=True)

    if type(Y1) == np.ndarray:
        Y1 = pd.Series(Y1, name='futime')
    else:
        Y1 = Y1.reset_index(drop=True)

    if type(Y2) == np.ndarray:
        Y2 = pd.Series(Y2, name='death')
    else:
        Y2 = Y2.reset_index(drop=True)

    if type(Z) == np.ndarray:
        Z = pd.Series(Z, name='class')

    elif type(Z) == pd.Series:
        Z = Z.reset_index(drop=True)
    else:
        Z = pd.Series(['' for i in range(len(Y1))], name='class')

    mod = PHReg(Y1, X, status=Y2)
    res = mod.fit()

    tables = res.summary().tables
    dfinfo1 = tables[1]
    dfinfo1.index.name = '项'
    dfinfo1.columns.name = '参数类型'
    dfinfo1.columns = [
        '回归系数', '标准误差SE', '风险比HR', 'Z值', 'p值', '95%CI(下限)', '95%CI(上限)'
    ]
    dfinfo1['or值'] = np.exp(res.params)
    dfinfo1 = dfinfo1.round(3)

    tb2 = {'df': res.df_model, '似然比卡方值': res.llf}

    dfinfo2 = pd.DataFrame([tb2]).round(3)
    dfinfo2 = dfinfo2.set_index('似然比卡方值')

    ## 生存率曲线
    D = Y1.to_frame(name='futime').join(Y2.to_frame(name='death')).join(
        Z.to_frame(name='class'))
    gb = D.groupby("class")

    classes = []
    for g in gb:
        sf = sm.SurvfuncRight(g[1]["futime"], g[1]["death"]).summary()
        sl = sf['Surv prob']
        sl.index.name = '生存时间'
        sl.name = str(g[0]) + '_生存率'
        classes.append(sl.to_frame())

    df_sl = pd.concat(classes, axis=1)

    rr = {'生存函数曲线': df_sl, 'Cox回归模型分析结果汇总': dfinfo1, 'Cox回归模型似然比检验结果': dfinfo2}

    return rr
Esempio n. 5
0
# Simulate grouped data with censoring
n = 200
mng = np.r_[30, 25, 50, 40]   # Mean event times per group
mn = np.kron(mng, np.ones(n)) # Mean event times per individual
mf = 40 # Mean follow-up time
evt = -mn*np.log(np.random.uniform(size=4*n)) # Event times
fut = -mf*np.log(np.random.uniform(size=4*n)) # Follow up times
y = np.where(evt < fut, evt, fut) # The time that is observed
c = (y == evt).astype(np.int)   # Censoring indicator (1 if censored)
g = np.kron(np.arange(4), np.ones(n)) # Group labels
df = pd.DataFrame({"y": y, "c": c, "g": g})

# Estimate the survival functions
sf = []
for k, dx in df.groupby("g"):
    s = sm.SurvfuncRight(dx.y, dx.c)
    sf.append(s)

# Plot the survival function estimates.
plt.clf()
plot_survfunc(sf)
ha, lb = plt.gca().get_legend_handles_labels()
leg = plt.figlegend(ha, lb, "center right")
leg.draw_frame(False)
pdf.savefig()

# Simulate data for proportional hazards regression analysis
n = 400
xmat = np.random.normal(size=(n, 4))
lp = np.dot(xmat, np.r_[1, -1, 0, 0])
evt = - np.exp(-lp) * np.log(np.random.uniform(size=n)) # Event times
Esempio n. 6
0
    ii = (pr > 0)
    tm = tm[ii]
    pr = pr[ii]
    lpr = np.log(pr)
    return tm[0:-1], -np.diff(lpr) / np.diff(tm)


# Plot the hazard functions for women and men.  These are unadjusted hazard functions,
# i.e. they describe the hazard for all people at a given age.

plt.grid(True)
sex = {0: "Male", 1: "Female"}
for female in (0, 1):
    ii = df.female == female
    s = sm.SurvfuncRight(df.loc[ii, "end"],
                         df.loc[ii, "mortstat"],
                         entry=df.loc[ii, "age_months"])
    tm, hz = hazard(s)
    ha = sm.nonparametric.lowess(np.log(hz), tm / 12)
    plt.plot(ha[:, 0], ha[:, 1], lw=3, label=sex[female])
ha, lb = plt.gca().get_legend_handles_labels()
leg = plt.figlegend(ha, lb, "upper center", ncol=2)
leg.draw_frame(False)
plt.xlabel("Age", size=15)
plt.ylabel("Log hazard", size=15)
_ = plt.xlim(18, 90)

# Plot "reverse survival functions" to get a sense of the follow up time.

plt.grid(True)
sex = {0: "Male", 1: "Female"}
#   Author: Bernie Roesler
#
"""
  Description: Statsmodels Survival Analysis Example
    See: <http://www.statsmodels.org/0.8.0/duration.html>
"""
#==============================================================================

import matplotlib.pyplot as plt
import statsmodels.api as sm

plt.close('all')

data = sm.datasets.get_rdataset("flchain", "survival").data
df = data.loc[data.sex == "F", :]
sf = sm.SurvfuncRight(df["futime"], df["death"])

print(sf.summary().head())
# print(sf.quantile(0.25))
# print(sf.quantile_ci(0.25))

# Plot single survival curve, remove censoring symbols
fig = sf.plot()
ax = fig.get_axes()[0]
pt = ax.get_lines()[1]
pt.set_visible(False)

# Add 95% confidence interval
lcb, ucb = sf.simultaneous_cb()
ax = fig.get_axes()[0]
ax.fill_between(sf.surv_times, lcb, ucb, color='lightgrey')