Example #1
0
 def test_kmf_with_risk_counts(self, block):
     data1 = np.random.exponential(10, size=(100))
     kmf = KaplanMeierFitter()
     kmf.fit(data1)
     kmf.plot(at_risk_counts=True)
     self.plt.title("test_kmf_with_risk_counts")
     self.plt.show(block=block)
Example #2
0
def plot_KM(stime, censor, g1, pval, figname):
    sns.set_style('white')
    kmf = KaplanMeierFitter()
    f, ax = plt.subplots(figsize=(3, 3))
    np.set_printoptions(precision=2, suppress=False)
    kmf.fit(stime[g1], event_observed=censor[g1], label=["high-risk group"])
    kmf.plot(ax=ax, ci_show=False, show_censors=True)
    kmf.fit(stime[~g1], event_observed=censor[~g1], label=["low-risk group"])
    kmf.plot(ax=ax, ci_show=False, show_censors=True)
    ax.grid(b=False)
    sns.despine()
    plt.ylim(0, 1)
    plt.xlabel("time", fontsize=14)
    plt.ylabel("survival", fontsize=14)
    plt.text(0.7,
             0.85,
             'pval = %.2e' % (pval),
             fontdict={'size': 12},
             horizontalalignment='center',
             verticalalignment='center',
             transform=ax.transAxes)
    plt.xticks(rotation=45)
    for item in (ax.get_xticklabels() + ax.get_yticklabels()):
        item.set_fontsize(10)
    plt.tight_layout()
    plt.savefig(figname, format='eps')
    plt.close()
def test_kmf_minimum_observation_bias():
    N = 250
    kmf = KaplanMeierFitter()
    T, C = exponential_survival_data(N, 0.1, scale=10)
    B = 0.01 * T
    kmf.fit(T, C, entry=B)
    kmf.plot()
    plt.title("Should have larger variances in the tails")
Example #4
0
def test_kmf_minimum_observation_bias():
    N = 250
    kmf = KaplanMeierFitter()
    T, C = exponential_survival_data(N, 0.1, scale=10)
    B = 0.01 * T
    kmf.fit(T, C, entry=B)
    kmf.plot()
    plt.title("Should have larger variances in the tails")
Example #5
0
 def test_flat_style_and_marker(self, block):
     data1 = np.random.exponential(10, size=200)
     data2 = np.random.exponential(2, size=200)
     C1 = np.random.binomial(1, 0.9, size=200)
     C2 = np.random.binomial(1, 0.95, size=200)
     kmf = KaplanMeierFitter()
     kmf.fit(data1, C1, label='test label 1')
     ax = kmf.plot(flat=True, censor_styles={'marker': '+', 'mew': 2, 'ms': 7})
     kmf.fit(data2, C2, label='test label 2')
     kmf.plot(ax=ax, censor_styles={'marker': 'o', 'ms': 7}, flat=True)
     self.plt.title("testing kmf flat styling + marker")
     self.plt.show(block=block)
     return
Example #6
0
    def test_kmf_left_censorship_plots(self, block):
        kmf = KaplanMeierFitter()
        lcd_dataset = load_lcd()
        alluvial_fan = lcd_dataset.loc[lcd_dataset['group'] == 'alluvial_fan']
        basin_trough = lcd_dataset.loc[lcd_dataset['group'] == 'basin_trough']
        kmf.fit(alluvial_fan['T'], alluvial_fan['C'], left_censorship=True, label='alluvial_fan')
        ax = kmf.plot()

        kmf.fit(basin_trough['T'], basin_trough['C'], left_censorship=True, label='basin_trough')
        ax = kmf.plot(ax=ax)
        self.plt.title("test_kmf_left_censorship_plots")
        self.plt.show(block=block)
        return
Example #7
0
    def test_kmf_with_inverted_axis(self, block, kmf):

        T = np.random.exponential(size=100)
        kmf = KaplanMeierFitter()
        kmf.fit(T, label="t2")
        ax = kmf.plot(invert_y_axis=True, at_risk_counts=True)

        T = np.random.exponential(3, size=100)
        kmf = KaplanMeierFitter()
        kmf.fit(T, label="t1")
        kmf.plot(invert_y_axis=True, ax=ax, ci_force_lines=False)

        self.plt.title("test_kmf_with_inverted_axis")
        self.plt.show(block=block)
Example #8
0
 def test_kmf_plotting(self, block):
     data1 = np.random.exponential(10, size=(100))
     data2 = np.random.exponential(2, size=(200, 1))
     data3 = np.random.exponential(4, size=(500, 1))
     kmf = KaplanMeierFitter()
     kmf.fit(data1, label='test label 1')
     ax = kmf.plot()
     kmf.fit(data2, label='test label 2')
     kmf.plot(ax=ax)
     kmf.fit(data3, label='test label 3')
     kmf.plot(ax=ax)
     self.plt.title("test_kmf_plotting")
     self.plt.show(block=block)
     return
Example #9
0
    def test_kmf_left_censorship_plots(self, block):
        matplotlib = pytest.importorskip("matplotlib")
        from matplotlib import pyplot as plt

        kmf = KaplanMeierFitter()
        lcd_dataset = load_lcd()
        alluvial_fan = lcd_dataset.ix[lcd_dataset['group'] == 'alluvial_fan']
        basin_trough = lcd_dataset.ix[lcd_dataset['group'] == 'basin_trough']
        kmf.fit(alluvial_fan['T'], alluvial_fan['C'], left_censorship=True, label='alluvial_fan')
        ax = kmf.plot()

        kmf.fit(basin_trough['T'], basin_trough['C'], left_censorship=True, label='basin_trough')
        ax = kmf.plot(ax=ax)
        plt.show(block=block)
        return
Example #10
0
    def test_kmf_left_censorship_plots(self):
        matplotlib = pytest.importorskip("matplotlib")
        from matplotlib import pyplot as plt

        kmf = KaplanMeierFitter()
        lcd_dataset = load_lcd()
        alluvial_fan = lcd_dataset.ix[lcd_dataset['group'] == 'alluvial_fan']
        basin_trough = lcd_dataset.ix[lcd_dataset['group'] == 'basin_trough']
        kmf.fit(alluvial_fan['T'], alluvial_fan['C'], left_censorship=True, label='alluvial_fan')
        ax = kmf.plot()

        kmf.fit(basin_trough['T'], basin_trough['C'], left_censorship=True, label='basin_trough')
        ax = kmf.plot(ax=ax)
        plt.show()
        return
Example #11
0
    def test_seaborn_doesnt_cause_kmf_plot_error(self, block, kmf, capsys):
        import seaborn as sns

        df = load_waltons()

        T = df['T']
        E = df['E']

        kmf = KaplanMeierFitter()
        kmf.fit(T, event_observed=E)
        kmf.plot()

        self.plt.title('test_seaborn_doesnt_cause_kmf_plot_error')
        self.plt.show(block=block)
        _, err = capsys.readouterr()
        assert err == ""
Example #12
0
    def test_seaborn_doesnt_cause_kmf_plot_error(self, block, kmf, capsys):
        import seaborn as sns

        df = load_waltons()

        T = df["T"]
        E = df["E"]

        kmf = KaplanMeierFitter()
        kmf.fit(T, event_observed=E)
        kmf.plot()

        self.plt.title("test_seaborn_doesnt_cause_kmf_plot_error")
        self.plt.show(block=block)
        _, err = capsys.readouterr()
        assert err == ""
Example #13
0
 def test_flat_style_no_censor(self, block):
     data1 = np.random.exponential(10, size=200)
     kmf = KaplanMeierFitter()
     kmf.fit(data1, label='test label 1')
     ax = kmf.plot(flat=True, censor_styles={'marker': '+', 'mew': 2, 'ms': 7})
     self.plt.title('test_flat_style_no_censor')
     self.plt.show(block=block)
     return
Example #14
0
    def test_kmf_left_censorship_plots(self, block):
        kmf = KaplanMeierFitter()
        lcd_dataset = load_lcd()
        alluvial_fan = lcd_dataset.loc[lcd_dataset["group"] == "alluvial_fan"]
        basin_trough = lcd_dataset.loc[lcd_dataset["group"] == "basin_trough"]
        kmf.fit(alluvial_fan["T"],
                alluvial_fan["C"],
                left_censorship=True,
                label="alluvial_fan")
        ax = kmf.plot()

        kmf.fit(basin_trough["T"],
                basin_trough["C"],
                left_censorship=True,
                label="basin_trough")
        ax = kmf.plot(ax=ax)
        self.plt.title("test_kmf_left_censorship_plots")
        self.plt.show(block=block)
        return
Example #15
0
 def test_negative_times_still_plots(self, block):
     n = 40
     T = np.linspace(-2, 3, n)
     C = np.random.randint(2, size=n)
     kmf = KaplanMeierFitter()
     kmf.fit(T, C)
     ax = kmf.plot()
     self.plt.title('test_negative_times_still_plots')
     self.plt.show(block=block)
     return
Example #16
0
def plot_KM(stime, censor, g1, pval, figname):
    sns.set_style('white')
    kmf = KaplanMeierFitter()        
    f, ax = plt.subplots(figsize=(3, 3))
    np.set_printoptions(precision=2, suppress=False)
    kmf.fit(stime[g1], event_observed=censor[g1], label=["high-risk group"])
    kmf.plot(ax=ax, ci_show=False, show_censors=True)
    kmf.fit(stime[~g1], event_observed=censor[~g1], label=["low-risk group"])
    kmf.plot(ax=ax, ci_show=False, show_censors=True)
    ax.grid(b=False)
    sns.despine()
    plt.ylim(0,1)
    plt.xlabel("time", fontsize=14)
    plt.ylabel("survival", fontsize=14)
    plt.text(0.7, 0.85, 'pval = %.2e' % (pval), fontdict={'size': 12},
            horizontalalignment='center', verticalalignment='center',
            transform=ax.transAxes) 
    plt.xticks(rotation=45)
    for item in (ax.get_xticklabels() + ax.get_yticklabels()):
        item.set_fontsize(10)
    plt.tight_layout()
    plt.savefig(figname, format='eps')
    plt.close()
Example #17
0
def survival_estimation(directory=tmp_dir):
    """ Use the Kaplan-Meier Estimate to estimate the survival function
    
        see: https://github.com/CamDavidsonPilon/lifelines    
    """
    from lifelines.estimation import KaplanMeierFitter

    df = get_lifetime_data_frame(recompute=False)
    # Estimate the survival function for all developers
    T = df['duration']
    C = df['censored']
    kmf = KaplanMeierFitter()
    kmf.fit(T, event_observed=C, label='All developers')
    print("Median survival time for all developers: {} years".format(
        kmf.median_))
    fig = plt.figure(figsize=(10, 8))
    ax = plt.subplot(111)
    kmf.plot(ax=ax, color=color_map(2))
    plt.ylabel('Survival probablility')
    plt.xlabel('Time in years')
    plt.ylim(0, 1)
    plt.grid()
    #plt.title("Estimated Survival function for developer activity")
    if directory is None:
        plt.ion()
        plt.show()
    else:
        plt.savefig('{0}/survival_all.png'.format(directory))
        plt.savefig('{0}/survival_all.pdf'.format(directory))
        plt.close()
    # Estimate the survival function by connectivity level
    mtop = df['top'] == 1
    kmf = KaplanMeierFitter()
    fig = plt.figure(figsize=(10, 8))
    ax = plt.subplot(111)
    kmf.fit(T[mtop], event_observed=C[mtop], label="Top connectivity level")
    print("Median survival time for top developers: {} years".format(
        kmf.median_))
    kmf.plot(ax=ax, color=color_map(0))
    kmf.fit(T[~mtop], event_observed=C[~mtop], label="Not in the top")
    print("Median survival time for not top developers: {} years".format(
        kmf.median_))
    kmf.plot(ax=ax, color=color_map(1))
    plt.ylabel('Survival probablility')
    plt.xlabel('Time in years')
    plt.ylim(0, 1)
    plt.grid()
    #plt.title("Estimated Survival function for top level connectivity")
    if directory is None:
        plt.ion()
        plt.show()
    else:
        plt.savefig('{0}/survival_top.png'.format(directory))
        plt.savefig('{0}/survival_top.pdf'.format(directory))
        plt.close()
# The method takes the same parameters as it's R counterpart, a time vector and a vector indicating which observations are observed or censored. The model fitting sequence is similar to the [scikit-learn](http://scikit-learn.org/stable/) api.

# In[16]:

f = tongue.type==1
T = tongue[f]['time']
C = tongue[f]['delta']

kmf.fit(T, event_observed=C)


# To get a plot with the confidence intervals, we simply can call `plot()` on our `kmf` object.

# In[17]:

kmf.plot(title='Tumor DNA Profile 1')


# Now we can convert this plot to an interactive [Plotly](https://plot.ly) object. However, we will have to augment the legend and filled area manually. Once we create a helper function, the process is simple.
# 
# Please see the Plotly Python [user guide](https://plot.ly/python/overview/#in-%5B37%5D) for more insight on how to update plot parameters. 
# 
# > Don't forget you can also easily edit the chart properties using the Plotly GUI interface by clicking the "Play with this data!" link below the chart.

# In[19]:

p = kmf.plot(ci_force_lines=True, title='Tumor DNA Profile 1 (95% CI)')

# Collect the plot object
kmf1 = plt.gcf() 
#df2= pd.read_table('genomicMatrix.tab',sep='\t')
#print list(df.columns.values)

    
survival_col = '_OS'
censor_col = '_OS_IND'
clinical_predictors = ['age_at_initial_pathologic_diagnosis']
df = df[pd.notnull(df[survival_col])]


tx = df['history_of_neoadjuvant_treatment']=='Yes'
ax = plt.subplot(111)

kmf1 = KaplanMeierFitter(alpha=0.95)
kmf1.fit(durations=df.ix[tx, survival_col], event_observed=df.ix[tx, censor_col], label=['Tx==Yes'])
kmf1.plot(ax=ax, show_censors=True,  ci_show=False)


kmf2 = KaplanMeierFitter(alpha=0.95)
kmf2.fit(durations=df.ix[~tx, survival_col], event_observed=df.ix[~tx, censor_col], label=['Tx==No'])
kmf2.plot(ax=ax, show_censors=True,  ci_show=False )

add_at_risk_counts(kmf1, kmf2, ax=ax)
plt.title ('Acute myeloid leukemia survival analysis with Tx and without Tx')
plt.xlabel(survival_col)
plt.savefig('km.png')

results = logrank_test(df.ix[tx, survival_col], df.ix[~tx, survival_col], df.ix[tx, censor_col], df.ix[~tx, censor_col], alpha=.99 )
results.print_summary()

cox = CoxPHFitter(normalize=False)
def uw_tier_histplots():
    sample['Underwriter Tier'] = sample['lead_underwriter_tier']
    sample['IPO Duration'] = sample['IPO_duration']
    ranks = ["-1", "0+", "7+", "9"]

    def uw_tier_duration(x):
        return sample[sample.lead_underwriter_tier==x]['IPO_duration']
    kwstat = kruskalwallis(*[uw_tier_duration(x) for x in ranks])

    # g = sb.FacetGrid(sample,
    #                 row="Underwriter Tier",
    #                 hue="Underwriter Tier",
    #                 palette=cp_four("cool_r"),
    #                 size=2, aspect=4,
    #                 hue_order=ranks, row_order=ranks,
    #                 legend=ranks, xlim=(0,1095))
    # g.map(sb.distplot, "IPO Duration")
    # plt.savefig("IPO_tiers_KP_survival.pdf", format='pdf', dpi=200)


    from lifelines.estimation import KaplanMeierFitter
    from lifelines.statistics import logrank_test
    import matplotlib.pyplot as plt

    ranks = ["-1", "0+", "7+", "9"]
    ranklabels = ['No Underwriter', 'Low Rank', 'Mid Rank', 'Rank 9 (elite)']
    kmf = KaplanMeierFitter()

    # Success
    f, ax = plt.subplots(1,1,figsize=(12, 4), sharex=True)
    T = 1 # annotation line thickness

    for rank, rlabel, color in zip(ranks, ranklabels, cp_four("cool_r")):
        uw = sample[sample.lead_underwriter_tier==rank]

        kmf.fit(uw['IPO_duration'],
                label='{} N={}'.format(rlabel, len(uw)),
                alpha=0.9)
        kmf.plot(ax=ax, c=color, alpha=0.7)

        quartiles = [int(np.percentile(kmf.durations, x)) for x in [25, 50, 75]][::-1]
        aprops = dict(facecolor=color, width=T, headwidth=T)

        if rank=="-1":
            plt.annotate("75%: {} days".format(quartiles[0]),
                        (quartiles[0], 0.25),
                        xytext=(quartiles[0]+145, 0.25+.04),
                        arrowprops=aprops)

            plt.annotate("50%: {} days".format(quartiles[1]),
                        (quartiles[1], 0.50),
                        xytext=(quartiles[1]+145, 0.50+.04),
                        arrowprops=aprops)

            plt.annotate("25%: {} days".format(quartiles[2]),
                        (quartiles[2], 0.75),
                        xytext=(quartiles[2]+145, 0.75+0.04),
                        arrowprops=aprops)
        elif rank=="9":
            plt.annotate("75%: {} days".format(quartiles[0]),
                        (quartiles[0], 0.25),
                        xytext=(quartiles[0]+415, 0.25+.1),
                        arrowprops=aprops)

            plt.annotate("50%: {} days".format(quartiles[1]),
                        (quartiles[1], 0.50),
                        xytext=(quartiles[1]+290, 0.50+.1),
                        arrowprops=aprops)

            plt.annotate("25%: {} days".format(quartiles[2]),
                        (quartiles[2], 0.75),
                        xytext=(quartiles[2]+165, 0.75+0.1),
                        arrowprops=aprops)

    plt.annotate("Kruskall Wallis\nH: {:.3f}\nprob: {:.3f}".format(*kwstat),
                (960, 0.1))
    plt.ylim(0,1)
    plt.xlim(0,1095)
    plt.title("Kaplan-Meier survival times by bank tier")
    plt.xlabel("IPO Duration (days)")
    plt.ylabel(r"$S(t)=Pr(T>t)$")
    plt.savefig("IPO_tiers_KP_survival.pdf", format='pdf', dpi=200)
def plot_kaplan_function(duration_key):

    from lifelines.estimation import KaplanMeierFitter
    from lifelines.statistics import logrank_test
    import matplotlib.pyplot as plt


    duration_keys = ["days_from_priced_to_listing",
                    "days_to_final_price_revision",
                    # "days_to_first_price_update",
                    "days_from_s1_to_listing",
                    "days_to_first_price_change"]
    duration_key = duration_keys[-1]

    kmf = KaplanMeierFitter()
    f, ax = plt.subplots(1,1,figsize=(12, 4), sharex=True)
    T = 1 # annotation line thickness
    xoffset = 0.4 # annotation offset (x-axis)
    yoffset = 0.04


    # Above filing price range
    kmf.fit(above[duration_key], label='Upward Price Amendment: N={}'.format(len(above)), alpha=0.9)
    kmf.plot(ax=ax, c=colors[5], alpha=0.7)

    quartiles = [int(np.percentile(kmf.durations, x)) for x in [25, 50, 75]][::-1]
    aprops = dict(facecolor=colors[5], width=T, headwidth=T)

    plt.annotate("75%: {} days".format(quartiles[0]),
                (quartiles[0], 0.25),
                xytext=(quartiles[0]+xoffset, 0.25+yoffset),
                arrowprops=aprops)

    plt.annotate("50%: {} days".format(quartiles[1]),
                (quartiles[1], 0.50),
                xytext=(quartiles[1]+xoffset, 0.50+yoffset),
                arrowprops=aprops)

    plt.annotate("25%: {} days".format(quartiles[2]),
                (quartiles[2], 0.75),
                xytext=(quartiles[2]+xoffset, 0.75+yoffset),
                arrowprops=aprops)


    # Under filing price range
    kmf.fit(under[duration_key], label='Downward Price Amendment: N={}'.format(len(under)),)
    kmf.plot(ax=ax, c=colors[2], alpha=0.7)

    quartiles = [int(np.percentile(kmf.durations, x)) for x in [25, 50, 75]][::-1]
    aprops = dict(facecolor=colors[2], width=T, headwidth=T)

    plt.annotate("75%: {} days".format(quartiles[0]),
                (quartiles[0], 0.25),
                xytext=(quartiles[0]+xoffset, 0.25+yoffset+0.05),
                arrowprops=aprops)

    plt.annotate("50%: {} days".format(quartiles[1]),
                (quartiles[1], 0.50),
                xytext=(quartiles[1]+xoffset, 0.50+yoffset+0.05),
                arrowprops=aprops)

    plt.annotate("25%: {} days".format(quartiles[2]),
                (quartiles[2], 0.75),
                xytext=(quartiles[2]+xoffset, 0.75+yoffset+0.05),
                arrowprops=aprops)


    # log rank tests + general graph labels
    # summary, p_value, results = logrank_test(
    #                                 above[duration_key],
    #                                 within[duration_key],
    #                                 under[duration_key],
    #                                 alpha=0.95)
    # ax.annotate("Log-rank test: (prob={p:.3f})".format(p=p_value),
    #             xy=(1210, 0.08))

    plt.ylim(0,1)
    plt.xlim(0, max(np.percentile(above[duration_key], 90), np.percentile(under[duration_key],90)))
    plt.title("Kaplan-Meier Survival Functions")
    plt.xlabel("Delay (days) in {}".format(duration_key))
    plt.ylabel(r"$S(t)=Pr(T>t)$")
dead.hist(bins=20, column='lenfol')
plt.show()

#plot the cumulative hazard (cdf)
dead.hist(bins=100, column='lenfol',
          cumulative=True, normed=1)
plt.show()

#plot survival curve
kaplen_meier = KaplanMeierFitter()
time_of_event = df['lenfol'];
event = df['fstat'];
time = np.linspace(0, 2500, 100)

kaplen_meier.fit(time_of_event, timeline=time, event_observed=event, label='All patients')
kaplen_meier.plot()
plt.show()

#stratify Congestive Heart Complications
history = df['chf'] == 1;

kaplen_meier = KaplanMeierFitter()
kaplen_meier.fit(time_of_event[history], timeline=time, event_observed=event[history], label='Congestive heart complications')
ax = kaplen_meier.plot()

kaplen_meier.fit(time_of_event[~history], timeline=time, event_observed=event[~history], label='No congestive heart complications')
kaplen_meier.plot(ax=ax, c="b")

plt.show()

#Cox proportional hazard
Example #23
0
cph.fit(df = df5, duration_col = 'duration', event_col = 'event')
cph.predict_survival_function(X = df5).plot()




#Kaplan Meier plots

from lifelines.estimation import KaplanMeierFitter
kmf = KaplanMeierFitter()


df6 = df3[['duration', 'event']]
kmf.fit(df6['duration'],df6['event'])
kmf.plot()


#how does the survival curve look alike for black people
df6a = df3[df3['race_factor'] == 'African-American']
df6a = df6a[df6a['score_factor'] == 'Low']
df6b = df6a[['duration', 'event']]
kmf.fit(df6b['duration'],df6b['event'])
kmf.plot()


#how does the survival curve look alike for white people
df6c = df3[df3['race_factor'] == 'Caucasian']
df6c = df6c[df6c['score_factor'] == 'Low']
df6d = df6c[['duration', 'event']]
kmf.fit(df6d['duration'],df6d['event'])
Example #24
0
cf = CoxPHFitter()
scores = k_fold_cross_validation(cf, data, 'time', event_col='event', k=3)
print scores
print np.mean(scores)
print np.std(scores)


le = preprocessing.LabelEncoder()
subtypes = le.fit_transform(dataset["subtypes"])
data["subtype"] = subtypes 
T = data["time"]
C = data["event"]

kmf = KaplanMeierFitter()
kmf.fit(T, event_observed=C)
kmf.plot(title = 'Survival Day Profile of Breast Cancer Patients')

# Basal
f1 = data.subtype == 0
T1 = data[f1]['time']
C1 = data[f1]['event']

# Her2
f2 = data.subtype == 1
T2 = data[f2]['time']
C2 = data[f2]['event']

# LumA
f3 = data.subtype == 2
T3 = data[f3]['time']
C3 = data[f3]['event']