def score(T_actual, labels, E_actual):
    '''
    Return a score based on grouping
    '''
    scores = []
    labels = labels.ravel()
    for g in ['high', 'mid', 'low']:
        members = labels == g
        
        if np.sum(members) > 0:
            kmf = KaplanMeierFitter()
            kmf.fit(T_actual[members],
                    E_actual[members],
                    label='{}'.format(g))
            
            # Last survival time
            if np.sum(E_actual[members]) > 0:
                lasttime = np.max(T_actual[members][E_actual[members] == 1])
            else:
                lasttime = np.nan
        
            # End survival rate, median survival time, member count, last event
            subscore = (kmf.survival_function_.iloc[-1, 0],
                        median_survival_times(kmf.survival_function_),
                        np.sum(members),
                        lasttime)
        else:
            # Rpart might fail in this respect
            subscore = (np.nan, np.nan, np.sum(members), np.nan)
            
        scores.append(subscore)
    return scores
예제 #2
0
 def test_passing_in_left_censorship_creates_a_cumulative_density(self, sample_lifetimes):
     T, C = sample_lifetimes
     kmf = KaplanMeierFitter()
     kmf.fit(T, C, left_censorship=True)
     assert hasattr(kmf, 'cumulative_density_')
     assert hasattr(kmf, 'plot_cumulative_density_')
     assert not hasattr(kmf, 'survival_function_')
예제 #3
0
 def test_stat_error_is_raised_if_too_few_early_deaths(self):
     observations = np.array([1,  1,  1, 22, 30, 28, 32, 11, 14, 36, 31, 33, 33, 37, 35, 25, 31,
                              22, 26, 24, 35, 34, 30, 35, 40, 39,  2])
     births = observations - 1
     kmf = KaplanMeierFitter()
     with pytest.raises(StatError):
         kmf.fit(observations, entry=births)
예제 #4
0
def plot_KM(stime, censor, g1, pval, figname):
    sns.set_style('white')
    kmf = KaplanMeierFitter()
    f, ax = plt.subplots(figsize=(3, 3))
    np.set_printoptions(precision=2, suppress=False)
    kmf.fit(stime[g1], event_observed=censor[g1], label=["high-risk group"])
    kmf.plot(ax=ax, ci_show=False, show_censors=True)
    kmf.fit(stime[~g1], event_observed=censor[~g1], label=["low-risk group"])
    kmf.plot(ax=ax, ci_show=False, show_censors=True)
    ax.grid(b=False)
    sns.despine()
    plt.ylim(0, 1)
    plt.xlabel("time", fontsize=14)
    plt.ylabel("survival", fontsize=14)
    plt.text(0.7,
             0.85,
             'pval = %.2e' % (pval),
             fontdict={'size': 12},
             horizontalalignment='center',
             verticalalignment='center',
             transform=ax.transAxes)
    plt.xticks(rotation=45)
    for item in (ax.get_xticklabels() + ax.get_yticklabels()):
        item.set_fontsize(10)
    plt.tight_layout()
    plt.savefig(figname, format='eps')
    plt.close()
예제 #5
0
 def test_kmf_with_risk_counts(self, block):
     data1 = np.random.exponential(10, size=(100))
     kmf = KaplanMeierFitter()
     kmf.fit(data1)
     kmf.plot(at_risk_counts=True)
     self.plt.title("test_kmf_with_risk_counts")
     self.plt.show(block=block)
예제 #6
0
 def test_stat_error_is_raised_if_too_few_early_deaths(self):
     observations = np.array([1,  1,  1, 22, 30, 28, 32, 11, 14, 36, 31, 33, 33, 37, 35, 25, 31,
                              22, 26, 24, 35, 34, 30, 35, 40, 39,  2])
     births = observations - 1
     kmf = KaplanMeierFitter()
     with pytest.raises(StatError):
         kmf.fit(observations, entry=births)
예제 #7
0
 def test_passing_in_left_censorship_creates_a_cumulative_density(self, sample_lifetimes):
     T, C = sample_lifetimes
     kmf = KaplanMeierFitter()
     kmf.fit(T, C, left_censorship=True)
     assert hasattr(kmf, 'cumulative_density_')
     assert hasattr(kmf, 'plot_cumulative_density_')
     assert not hasattr(kmf, 'survival_function_')
예제 #8
0
 def test_predict_method_returns_exact_value_if_given_an_observed_time(
         self):
     T = [1, 2, 3]
     kmf = KaplanMeierFitter()
     kmf.fit(T)
     time = 1
     assert abs(kmf.predict(time) -
                kmf.survival_function_.ix[time].values) < 10e-8
예제 #9
0
 def test_show_censor_with_discrete_date(self, block):
     T = np.random.binomial(20, 0.1, size=100)
     C = np.random.binomial(1, 0.8, size=100)
     kmf = KaplanMeierFitter()
     kmf.fit(T, C).plot(show_censors=True)
     self.plt.title('test_show_censor_with_discrete_date')
     self.plt.show(block=block)
     return
예제 #10
0
 def test_show_censor_with_index_0(self, block):
     T = np.random.binomial(20, 0.9, size=100)  # lifelines should auto put a 0 in.
     C = np.random.binomial(1, 0.8, size=100)
     kmf = KaplanMeierFitter()
     kmf.fit(T, C).plot(show_censors=True)
     self.plt.title('test_show_censor_with_index_0')
     self.plt.show(block=block)
     return
예제 #11
0
 def kaplanMeier(self):
     from lifelines.estimation import KaplanMeierFitter
     df = self.inputDf
     self.kmf = KaplanMeierFitter()
     time = df[self.eventTime].dt.days
     status = df[self.censorVar]
     lab = self.label
     self.kmf.fit(time, event_observed=status, label=lab)
예제 #12
0
def test_kmf_minimum_observation_bias():
    N = 250
    kmf = KaplanMeierFitter()
    T, C = exponential_survival_data(N, 0.1, scale=10)
    B = 0.01 * T
    kmf.fit(T, C, entry=B)
    kmf.plot()
    plt.title("Should have larger variances in the tails")
예제 #13
0
 def test_flat_style_no_censor(self, block):
     data1 = np.random.exponential(10, size=200)
     kmf = KaplanMeierFitter()
     kmf.fit(data1, label='test label 1')
     ax = kmf.plot(flat=True, censor_styles={'marker': '+', 'mew': 2, 'ms': 7})
     self.plt.title('test_flat_style_no_censor')
     self.plt.show(block=block)
     return
예제 #14
0
    def test_kmf_left_censorship_stats(self):
        # from http://www.public.iastate.edu/~pdixon/stat505/Chapter%2011.pdf
        T = [3, 5, 5, 5, 6, 6, 10, 12]
        C = [1, 0, 0, 1, 1, 1,  0,  1]
        kmf = KaplanMeierFitter()
        kmf.fit(T, C, left_censorship=True)

        actual = kmf.cumulative_density_[kmf._label].values 
        npt.assert_almost_equal(actual, np.array([0, 0.437500, 0.5833333, 0.875, 0.875, 1]))
예제 #15
0
    def test_kmf_left_censorship_stats(self):
        # from http://www.public.iastate.edu/~pdixon/stat505/Chapter%2011.pdf
        T = [3, 5, 5, 5, 6, 6, 10, 12]
        C = [1, 0, 0, 1, 1, 1,  0,  1]
        kmf = KaplanMeierFitter()
        kmf.fit(T, C, left_censorship=True)

        actual = kmf.cumulative_density_[kmf._label].values
        npt.assert_almost_equal(actual, np.array([0, 0.437500, 0.5833333, 0.875, 0.875, 1]))
예제 #16
0
 def test_negative_times_still_plots(self, block):
     n = 40
     T = np.linspace(-2, 3, n)
     C = np.random.randint(2, size=n)
     kmf = KaplanMeierFitter()
     kmf.fit(T, C)
     ax = kmf.plot()
     self.plt.title('test_negative_times_still_plots')
     self.plt.show(block=block)
     return
예제 #17
0
def kmf_calculation(df, bucket):

    indices_ = np.where(df.use_buckets == bucket)

    T = df['duration'].iloc[indices_]
    C = df['churn'].iloc[indices_]

    kmf = KaplanMeierFitter()
    kmf.fit(T, event_observed=C, label=bucket)

    return kmf
예제 #18
0
    def test_kmf_survival_curve_output_against_R(self):
        df = load_g3()
        ix = df['group'] == 'RIT'
        kmf = KaplanMeierFitter()

        expected = np.array([[0.909, 0.779]]).T
        kmf.fit(df.ix[ix]['time'], df.ix[ix]['event'], timeline=[25, 53])
        npt.assert_array_almost_equal(kmf.survival_function_.values, expected, decimal=3)

        expected = np.array([[0.833, 0.667, 0.5, 0.333]]).T
        kmf.fit(df.ix[~ix]['time'], df.ix[~ix]['event'], timeline=[9, 19, 32, 34])
        npt.assert_array_almost_equal(kmf.survival_function_.values, expected, decimal=3)
예제 #19
0
    def test_shifting_durations_doesnt_affect_survival_function_values(self):
        T = np.random.exponential(10, size=100)
        kmf = KaplanMeierFitter()
        expected = kmf.fit(T).survival_function_.values

        T_shifted = T + 100
        npt.assert_almost_equal(expected, kmf.fit(T_shifted).survival_function_.values)

        T_shifted = T - 50
        npt.assert_almost_equal(expected[1:], kmf.fit(T_shifted).survival_function_.values)

        T_shifted = T - 200
        npt.assert_almost_equal(expected[1:], kmf.fit(T_shifted).survival_function_.values)
예제 #20
0
    def test_kmf_confidence_intervals_output_against_R(self):
        # this uses conf.type = 'log-log'
        df = load_g3()
        ix = df['group'] != 'RIT'
        kmf = KaplanMeierFitter()
        kmf.fit(df.ix[ix]['time'], df.ix[ix]['event'], timeline=[9, 19, 32, 34])

        expected_lower_bound = np.array([0.2731, 0.1946, 0.1109, 0.0461])
        npt.assert_array_almost_equal(kmf.confidence_interval_['KM_estimate_lower_0.95'].values,
                                      expected_lower_bound, decimal=3)

        expected_upper_bound = np.array([0.975, 0.904, 0.804, 0.676])
        npt.assert_array_almost_equal(kmf.confidence_interval_['KM_estimate_upper_0.95'].values,
                                      expected_upper_bound, decimal=3)
예제 #21
0
    def test_kmf_confidence_intervals_output_against_R(self):
        # this uses conf.type = 'log-log'
        df = load_g3()
        ix = df['group'] != 'RIT'
        kmf = KaplanMeierFitter()
        kmf.fit(df.ix[ix]['time'], df.ix[ix]['event'], timeline=[9, 19, 32, 34])

        expected_lower_bound = np.array([0.2731, 0.1946, 0.1109, 0.0461])
        npt.assert_array_almost_equal(kmf.confidence_interval_['KM_estimate_lower_0.95'].values,
                                      expected_lower_bound, decimal=3)

        expected_upper_bound = np.array([0.975, 0.904, 0.804, 0.676])
        npt.assert_array_almost_equal(kmf.confidence_interval_['KM_estimate_upper_0.95'].values,
                                      expected_upper_bound, decimal=3)
예제 #22
0
class survModel():
    def __init__(self, inputDf, eventTime, censorVar, label):
        self.inputDf = inputDf
        self.eventTime = eventTime
        self.censorVar = censorVar
        self.label = label

    def kaplanMeier(self):
        from lifelines.estimation import KaplanMeierFitter
        df = self.inputDf
        self.kmf = KaplanMeierFitter()
        time = df[self.eventTime].dt.days
        status = df[self.censorVar]
        lab = self.label
        self.kmf.fit(time, event_observed=status, label=lab)
예제 #23
0
    def test_seaborn_doesnt_cause_kmf_plot_error(self, block, kmf, capsys):
        import seaborn as sns

        df = load_waltons()

        T = df['T']
        E = df['E']

        kmf = KaplanMeierFitter()
        kmf.fit(T, event_observed=E)
        kmf.plot()

        self.plt.title('test_seaborn_doesnt_cause_kmf_plot_error')
        self.plt.show(block=block)
        _, err = capsys.readouterr()
        assert err == ""
예제 #24
0
    def test_kmf_left_censorship_plots(self, block):
        kmf = KaplanMeierFitter()
        lcd_dataset = load_lcd()
        alluvial_fan = lcd_dataset.loc[lcd_dataset['group'] == 'alluvial_fan']
        basin_trough = lcd_dataset.loc[lcd_dataset['group'] == 'basin_trough']
        kmf.fit(alluvial_fan['T'], alluvial_fan['C'], left_censorship=True, label='alluvial_fan')
        ax = kmf.plot()

        kmf.fit(basin_trough['T'], basin_trough['C'], left_censorship=True, label='basin_trough')
        ax = kmf.plot(ax=ax)
        self.plt.title("test_kmf_left_censorship_plots")
        self.plt.show(block=block)
        return
예제 #25
0
 def test_flat_style_and_marker(self, block):
     data1 = np.random.exponential(10, size=200)
     data2 = np.random.exponential(2, size=200)
     C1 = np.random.binomial(1, 0.9, size=200)
     C2 = np.random.binomial(1, 0.95, size=200)
     kmf = KaplanMeierFitter()
     kmf.fit(data1, C1, label='test label 1')
     ax = kmf.plot(flat=True, censor_styles={'marker': '+', 'mew': 2, 'ms': 7})
     kmf.fit(data2, C2, label='test label 2')
     kmf.plot(ax=ax, censor_styles={'marker': 'o', 'ms': 7}, flat=True)
     self.plt.title("testing kmf flat styling + marker")
     self.plt.show(block=block)
     return
예제 #26
0
    def test_kmf_left_censorship_plots(self, block):
        matplotlib = pytest.importorskip("matplotlib")
        from matplotlib import pyplot as plt

        kmf = KaplanMeierFitter()
        lcd_dataset = load_lcd()
        alluvial_fan = lcd_dataset.ix[lcd_dataset['group'] == 'alluvial_fan']
        basin_trough = lcd_dataset.ix[lcd_dataset['group'] == 'basin_trough']
        kmf.fit(alluvial_fan['T'], alluvial_fan['C'], left_censorship=True, label='alluvial_fan')
        ax = kmf.plot()

        kmf.fit(basin_trough['T'], basin_trough['C'], left_censorship=True, label='basin_trough')
        ax = kmf.plot(ax=ax)
        plt.show(block=block)
        return
예제 #27
0
def test_kmf_minimum_observation_bias():
    N = 250
    kmf = KaplanMeierFitter()
    T, C = exponential_survival_data(N, 0.1, scale=10)
    B = 0.01 * T
    kmf.fit(T, C, entry=B)
    kmf.plot()
    plt.title("Should have larger variances in the tails")
예제 #28
0
 def test_predict_method_returns_gives_values_prior_to_the_value_in_the_survival_function(
         self):
     T = [1, 2, 3]
     kmf = KaplanMeierFitter()
     kmf.fit(T)
     assert abs(kmf.predict(0.5) -
                kmf.survival_function_.ix[0].values) < 10e-8
     assert abs(kmf.predict(1.9999) -
                kmf.survival_function_.ix[1].values) < 10e-8
예제 #29
0
    def test_kmf_left_censorship_plots(self):
        matplotlib = pytest.importorskip("matplotlib")
        from matplotlib import pyplot as plt

        kmf = KaplanMeierFitter()
        lcd_dataset = load_lcd()
        alluvial_fan = lcd_dataset.ix[lcd_dataset['group'] == 'alluvial_fan']
        basin_trough = lcd_dataset.ix[lcd_dataset['group'] == 'basin_trough']
        kmf.fit(alluvial_fan['T'], alluvial_fan['C'], left_censorship=True, label='alluvial_fan')
        ax = kmf.plot()

        kmf.fit(basin_trough['T'], basin_trough['C'], left_censorship=True, label='basin_trough')
        ax = kmf.plot(ax=ax)
        plt.show()
        return
예제 #30
0
    def test_kmf_left_censorship_plots(self, block):
        kmf = KaplanMeierFitter()
        lcd_dataset = load_lcd()
        alluvial_fan = lcd_dataset.loc[lcd_dataset["group"] == "alluvial_fan"]
        basin_trough = lcd_dataset.loc[lcd_dataset["group"] == "basin_trough"]
        kmf.fit(alluvial_fan["T"],
                alluvial_fan["C"],
                left_censorship=True,
                label="alluvial_fan")
        ax = kmf.plot()

        kmf.fit(basin_trough["T"],
                basin_trough["C"],
                left_censorship=True,
                label="basin_trough")
        ax = kmf.plot(ax=ax)
        self.plt.title("test_kmf_left_censorship_plots")
        self.plt.show(block=block)
        return
예제 #31
0
    def test_kmf_survival_curve_output_against_R(self):
        df = load_g3()
        ix = df['group'] == 'RIT'
        kmf = KaplanMeierFitter()

        expected = np.array([[0.909, 0.779]]).T
        kmf.fit(df.ix[ix]['time'], df.ix[ix]['event'], timeline=[25, 53])
        npt.assert_array_almost_equal(kmf.survival_function_.values, expected, decimal=3)

        expected = np.array([[0.833, 0.667, 0.5, 0.333]]).T
        kmf.fit(df.ix[~ix]['time'], df.ix[~ix]['event'], timeline=[9, 19, 32, 34])
        npt.assert_array_almost_equal(kmf.survival_function_.values, expected, decimal=3)
예제 #32
0
def plot_KM(stime, censor, g1, pval, figname):
    sns.set_style('white')
    kmf = KaplanMeierFitter()        
    f, ax = plt.subplots(figsize=(3, 3))
    np.set_printoptions(precision=2, suppress=False)
    kmf.fit(stime[g1], event_observed=censor[g1], label=["high-risk group"])
    kmf.plot(ax=ax, ci_show=False, show_censors=True)
    kmf.fit(stime[~g1], event_observed=censor[~g1], label=["low-risk group"])
    kmf.plot(ax=ax, ci_show=False, show_censors=True)
    ax.grid(b=False)
    sns.despine()
    plt.ylim(0,1)
    plt.xlabel("time", fontsize=14)
    plt.ylabel("survival", fontsize=14)
    plt.text(0.7, 0.85, 'pval = %.2e' % (pval), fontdict={'size': 12},
            horizontalalignment='center', verticalalignment='center',
            transform=ax.transAxes) 
    plt.xticks(rotation=45)
    for item in (ax.get_xticklabels() + ax.get_yticklabels()):
        item.set_fontsize(10)
    plt.tight_layout()
    plt.savefig(figname, format='eps')
    plt.close()
예제 #33
0
    def test_shifting_durations_doesnt_affect_survival_function_values(self):
        T = np.random.exponential(10, size=100)
        kmf = KaplanMeierFitter()
        expected = kmf.fit(T).survival_function_.values

        T_shifted = T + 100
        npt.assert_almost_equal(expected, kmf.fit(T_shifted).survival_function_.values)

        T_shifted = T - 50
        npt.assert_almost_equal(expected[1:], kmf.fit(T_shifted).survival_function_.values)

        T_shifted = T - 200
        npt.assert_almost_equal(expected[1:], kmf.fit(T_shifted).survival_function_.values)
예제 #34
0
    def test_seaborn_doesnt_cause_kmf_plot_error(self, block, kmf, capsys):
        import seaborn as sns

        df = load_waltons()

        T = df["T"]
        E = df["E"]

        kmf = KaplanMeierFitter()
        kmf.fit(T, event_observed=E)
        kmf.plot()

        self.plt.title("test_seaborn_doesnt_cause_kmf_plot_error")
        self.plt.show(block=block)
        _, err = capsys.readouterr()
        assert err == ""
예제 #35
0
def survival(request, indel_id):
    gene = get_object_or_404(Indel, pk=indel_id).related_gene.id
    cancer = request.GET.get('cancer')
    threshold = float(request.GET.get('threshold', '2.0'))
    try:
        clinical = pd.read_table('{}/{}/{}.txt'.format(settings.SURVIVAL_ROOT,
                                                       cancer, gene))
    except FileNotFoundError:
        return HttpResponseNotFound()
    alterations = clinical.loc[
        clinical['expression'].abs() >= threshold].copy()
    no_alterations = clinical.loc[
        clinical['expression'].abs() < threshold].copy()
    kmf = KaplanMeierFitter()
    alterations_kmf = kmf.fit(alterations['OS_MONTHS'],
                              alterations['EVENT'],
                              label='alterations')
    kmf = KaplanMeierFitter()
    no_alterations_kmf = kmf.fit(no_alterations['OS_MONTHS'],
                                 no_alterations['EVENT'],
                                 label='no_alterations')
    sumarry = logrank_test(alterations['OS_MONTHS'],
                           no_alterations['OS_MONTHS'],
                           alterations['EVENT'],
                           no_alterations['EVENT'],
                           alpha=0.99)
    return JsonResponse(
        dict(
            alterations_time=alterations_kmf.survival_function_.index.tolist(),
            alterations_upper=alterations_kmf.
            confidence_interval_['alterations_upper_0.95'].fillna(1).tolist(),
            alterations_lower=alterations_kmf.
            confidence_interval_['alterations_lower_0.95'].fillna(1).tolist(),
            alterations_survival=alterations_kmf.
            survival_function_['alterations'].tolist(),
            no_alterations_time=no_alterations_kmf.survival_function_.index.
            tolist(),
            no_alterations_upper=no_alterations_kmf.confidence_interval_[
                'no_alterations_upper_0.95'].fillna(1).tolist(),
            no_alterations_lower=no_alterations_kmf.confidence_interval_[
                'no_alterations_lower_0.95'].fillna(1).tolist(),
            no_alterations_survival=no_alterations_kmf.
            survival_function_['no_alterations'].tolist(),
            p_value=sumarry.p_value))
예제 #36
0
 def test_kaplan_meier_with_censorship(self, sample_lifetimes):
     T, C = sample_lifetimes
     kmf = KaplanMeierFitter()
     kmf.fit(T, C)
     npt.assert_almost_equal(kmf.survival_function_.values, self.kaplan_meier(T, C))
예제 #37
0
def uw_tier_histplots():
    sample['Underwriter Tier'] = sample['lead_underwriter_tier']
    sample['IPO Duration'] = sample['IPO_duration']
    ranks = ["-1", "0+", "7+", "9"]

    def uw_tier_duration(x):
        return sample[sample.lead_underwriter_tier==x]['IPO_duration']
    kwstat = kruskalwallis(*[uw_tier_duration(x) for x in ranks])

    # g = sb.FacetGrid(sample,
    #                 row="Underwriter Tier",
    #                 hue="Underwriter Tier",
    #                 palette=cp_four("cool_r"),
    #                 size=2, aspect=4,
    #                 hue_order=ranks, row_order=ranks,
    #                 legend=ranks, xlim=(0,1095))
    # g.map(sb.distplot, "IPO Duration")
    # plt.savefig("IPO_tiers_KP_survival.pdf", format='pdf', dpi=200)


    from lifelines.estimation import KaplanMeierFitter
    from lifelines.statistics import logrank_test
    import matplotlib.pyplot as plt

    ranks = ["-1", "0+", "7+", "9"]
    ranklabels = ['No Underwriter', 'Low Rank', 'Mid Rank', 'Rank 9 (elite)']
    kmf = KaplanMeierFitter()

    # Success
    f, ax = plt.subplots(1,1,figsize=(12, 4), sharex=True)
    T = 1 # annotation line thickness

    for rank, rlabel, color in zip(ranks, ranklabels, cp_four("cool_r")):
        uw = sample[sample.lead_underwriter_tier==rank]

        kmf.fit(uw['IPO_duration'],
                label='{} N={}'.format(rlabel, len(uw)),
                alpha=0.9)
        kmf.plot(ax=ax, c=color, alpha=0.7)

        quartiles = [int(np.percentile(kmf.durations, x)) for x in [25, 50, 75]][::-1]
        aprops = dict(facecolor=color, width=T, headwidth=T)

        if rank=="-1":
            plt.annotate("75%: {} days".format(quartiles[0]),
                        (quartiles[0], 0.25),
                        xytext=(quartiles[0]+145, 0.25+.04),
                        arrowprops=aprops)

            plt.annotate("50%: {} days".format(quartiles[1]),
                        (quartiles[1], 0.50),
                        xytext=(quartiles[1]+145, 0.50+.04),
                        arrowprops=aprops)

            plt.annotate("25%: {} days".format(quartiles[2]),
                        (quartiles[2], 0.75),
                        xytext=(quartiles[2]+145, 0.75+0.04),
                        arrowprops=aprops)
        elif rank=="9":
            plt.annotate("75%: {} days".format(quartiles[0]),
                        (quartiles[0], 0.25),
                        xytext=(quartiles[0]+415, 0.25+.1),
                        arrowprops=aprops)

            plt.annotate("50%: {} days".format(quartiles[1]),
                        (quartiles[1], 0.50),
                        xytext=(quartiles[1]+290, 0.50+.1),
                        arrowprops=aprops)

            plt.annotate("25%: {} days".format(quartiles[2]),
                        (quartiles[2], 0.75),
                        xytext=(quartiles[2]+165, 0.75+0.1),
                        arrowprops=aprops)

    plt.annotate("Kruskall Wallis\nH: {:.3f}\nprob: {:.3f}".format(*kwstat),
                (960, 0.1))
    plt.ylim(0,1)
    plt.xlim(0,1095)
    plt.title("Kaplan-Meier survival times by bank tier")
    plt.xlabel("IPO Duration (days)")
    plt.ylabel(r"$S(t)=Pr(T>t)$")
    plt.savefig("IPO_tiers_KP_survival.pdf", format='pdf', dpi=200)
예제 #38
0
 def test_predict_method_returns_gives_values_prior_to_the_value_in_the_survival_function(self):
     T = [1, 2, 3]
     kmf = KaplanMeierFitter()
     kmf.fit(T)
     assert abs(kmf.predict(0.5) - kmf.survival_function_.ix[0].values) < 10e-8
     assert abs(kmf.predict(1.9999) - kmf.survival_function_.ix[1].values) < 10e-8
def plot_kaplan_function(duration_key):

    from lifelines.estimation import KaplanMeierFitter
    from lifelines.statistics import logrank_test
    import matplotlib.pyplot as plt


    duration_keys = ["days_from_priced_to_listing",
                    "days_to_final_price_revision",
                    # "days_to_first_price_update",
                    "days_from_s1_to_listing",
                    "days_to_first_price_change"]
    duration_key = duration_keys[-1]

    kmf = KaplanMeierFitter()
    f, ax = plt.subplots(1,1,figsize=(12, 4), sharex=True)
    T = 1 # annotation line thickness
    xoffset = 0.4 # annotation offset (x-axis)
    yoffset = 0.04


    # Above filing price range
    kmf.fit(above[duration_key], label='Upward Price Amendment: N={}'.format(len(above)), alpha=0.9)
    kmf.plot(ax=ax, c=colors[5], alpha=0.7)

    quartiles = [int(np.percentile(kmf.durations, x)) for x in [25, 50, 75]][::-1]
    aprops = dict(facecolor=colors[5], width=T, headwidth=T)

    plt.annotate("75%: {} days".format(quartiles[0]),
                (quartiles[0], 0.25),
                xytext=(quartiles[0]+xoffset, 0.25+yoffset),
                arrowprops=aprops)

    plt.annotate("50%: {} days".format(quartiles[1]),
                (quartiles[1], 0.50),
                xytext=(quartiles[1]+xoffset, 0.50+yoffset),
                arrowprops=aprops)

    plt.annotate("25%: {} days".format(quartiles[2]),
                (quartiles[2], 0.75),
                xytext=(quartiles[2]+xoffset, 0.75+yoffset),
                arrowprops=aprops)


    # Under filing price range
    kmf.fit(under[duration_key], label='Downward Price Amendment: N={}'.format(len(under)),)
    kmf.plot(ax=ax, c=colors[2], alpha=0.7)

    quartiles = [int(np.percentile(kmf.durations, x)) for x in [25, 50, 75]][::-1]
    aprops = dict(facecolor=colors[2], width=T, headwidth=T)

    plt.annotate("75%: {} days".format(quartiles[0]),
                (quartiles[0], 0.25),
                xytext=(quartiles[0]+xoffset, 0.25+yoffset+0.05),
                arrowprops=aprops)

    plt.annotate("50%: {} days".format(quartiles[1]),
                (quartiles[1], 0.50),
                xytext=(quartiles[1]+xoffset, 0.50+yoffset+0.05),
                arrowprops=aprops)

    plt.annotate("25%: {} days".format(quartiles[2]),
                (quartiles[2], 0.75),
                xytext=(quartiles[2]+xoffset, 0.75+yoffset+0.05),
                arrowprops=aprops)


    # log rank tests + general graph labels
    # summary, p_value, results = logrank_test(
    #                                 above[duration_key],
    #                                 within[duration_key],
    #                                 under[duration_key],
    #                                 alpha=0.95)
    # ax.annotate("Log-rank test: (prob={p:.3f})".format(p=p_value),
    #             xy=(1210, 0.08))

    plt.ylim(0,1)
    plt.xlim(0, max(np.percentile(above[duration_key], 90), np.percentile(under[duration_key],90)))
    plt.title("Kaplan-Meier Survival Functions")
    plt.xlabel("Delay (days) in {}".format(duration_key))
    plt.ylabel(r"$S(t)=Pr(T>t)$")
# print(df.head())

print(df['lenfol'].describe())

# look at how long a patient lives
dead = df[df['fstat'] > 0]
dead.hist(bins=20, column='lenfol')
plt.show()

#plot the cumulative hazard (cdf)
dead.hist(bins=100, column='lenfol',
          cumulative=True, normed=1)
plt.show()

#plot survival curve
kaplen_meier = KaplanMeierFitter()
time_of_event = df['lenfol'];
event = df['fstat'];
time = np.linspace(0, 2500, 100)

kaplen_meier.fit(time_of_event, timeline=time, event_observed=event, label='All patients')
kaplen_meier.plot()
plt.show()

#stratify Congestive Heart Complications
history = df['chf'] == 1;

kaplen_meier = KaplanMeierFitter()
kaplen_meier.fit(time_of_event[history], timeline=time, event_observed=event[history], label='Congestive heart complications')
ax = kaplen_meier.plot()
예제 #41
0
print("White Medium Hazard: %.2f" % (math.exp(0.7736)))


df5 = df3[df3['']
df5 = df3[['duration', 'event']]

cph.fit(df = df5, duration_col = 'duration', event_col = 'event')
cph.predict_survival_function(X = df5).plot()




#Kaplan Meier plots

from lifelines.estimation import KaplanMeierFitter
kmf = KaplanMeierFitter()


df6 = df3[['duration', 'event']]
kmf.fit(df6['duration'],df6['event'])
kmf.plot()


#how does the survival curve look alike for black people
df6a = df3[df3['race_factor'] == 'African-American']
df6a = df6a[df6a['score_factor'] == 'Low']
df6b = df6a[['duration', 'event']]
kmf.fit(df6b['duration'],df6b['event'])
kmf.plot()

get_ipython().magic(u'R -o p')

# Render HTML
HTML(p[0])


# The `y axis` represents the probability a patient is still alive at time $t$ weeks. We see a steep drop off within the first 100 weeks, and then observe the curve flattening. The dotted lines represent the 95% confidence intervals.

# ### Using Python

# We will now replicate the above steps using python. Above, we have already specified a variable `tongues` that holds the data in a pandas dataframe.

# In[15]:

from lifelines.estimation import KaplanMeierFitter
kmf = KaplanMeierFitter()


# The method takes the same parameters as it's R counterpart, a time vector and a vector indicating which observations are observed or censored. The model fitting sequence is similar to the [scikit-learn](http://scikit-learn.org/stable/) api.

# In[16]:

f = tongue.type==1
T = tongue[f]['time']
C = tongue[f]['delta']

kmf.fit(T, event_observed=C)


# To get a plot with the confidence intervals, we simply can call `plot()` on our `kmf` object.
예제 #43
0
 def test_predict_methods_returns_a_scalar_or_a_array_depending_on_input(self, sample_lifetimes):
     kmf = KaplanMeierFitter()
     kmf.fit(sample_lifetimes[0])
     assert not isinstance(kmf.predict(1), Iterable)
     assert isinstance(kmf.predict([1, 2]), Iterable)
예제 #44
0
 def test_kaplan_meier_with_censorship(self, sample_lifetimes):
     T, C = sample_lifetimes
     kmf = KaplanMeierFitter()
     kmf.fit(T, C)
     npt.assert_almost_equal(kmf.survival_function_.values, self.kaplan_meier(T, C))
예제 #45
0
        def overall_survival_analysis(data):
            list_patients = []
            for patient, data in data.items():
                patient_info = []
                v_status = data['VitalStatus']
                s_time = data['SurvivalTime']
                if v_status == 'Alive' or 'Dead':

                    if v_status == 'Alive':
                        v_status = 0

                    else:
                        v_status = 1

                    patient_info.append(v_status)

                    if type(s_time) != str:
                        patient_info.append(s_time)

                    list_patients.append(patient_info)

            df = pd.DataFrame(list_patients)
            num_patients = len(df)

            df.columns = ['Event', 'Duration']
            kmf = KaplanMeierFitter()
            kmf.fit(durations=df.Duration, event_observed=df.Event)

            #print(kmf.survival_function_)
            coordinates = []
            survival_fx = (kmf.survival_function_)
            coordinates_y = list(survival_fx.values.flatten())

            coordinates_x = []
            for row in survival_fx.iterrows():
                timeline, km_estimate = row
                coordinates_x.append(timeline.tolist())

            for (x, y) in zip(coordinates_x, coordinates_y):
                coordinates.append([x, y])

            #calculate the survival probability for t=1 year
            surv_for_1 = kmf.predict(12)

            #caluclate the survival probability for t=3 years
            surv_for_3 = kmf.predict(36)

            #calculate the survival probability for t=5
            surv_for_5 = kmf.predict(60)

            surv_median = int(round(kmf.median_))
            year_1_surv = int(round(surv_for_1 * 100))
            year_3_surv = int(round(surv_for_3 * 100))
            year_5_surv = int(round(surv_for_5 * 100))

            overall_surv_stats = {}
            overall_surv_stats['Coordinates'] = coordinates
            overall_surv_stats['Median'] = surv_median
            overall_surv_stats['1Year'] = year_1_surv
            overall_surv_stats['3Year'] = year_3_surv
            overall_surv_stats['5Year'] = year_5_surv

            return overall_surv_stats
예제 #46
0
        def progression_free_analysis(data):
            list_patients = []
            for patient, data in data.items():
                v_status = data['VitalStatus']
                d_progression = data['DiseaseProgression']

                s_time = data['SurvivalTime']

                patient_info = []

                if d_progression != None:
                    if v_status != None:
                        if v_status == 'Alive' or d_progression == 'False':
                            #patient disease did not progress
                            progression_status = 0
                        else:
                            progression_status = 1
                            #patient disease did progress
                        patient_info.append(progression_status)

                        if type(s_time) != str:
                            patient_info.append(s_time)
                        list_patients.append(patient_info)
            df = pd.DataFrame(list_patients)
            num_patients = len(df)

            df.columns = ['Event', 'Duration']
            kmf = KaplanMeierFitter()
            kmf.fit(durations=df.Duration, event_observed=df.Event)

            coordinates = []
            survival_fx = kmf.survival_function_

            coordinates_y = list(survival_fx.values.flatten())
            coordinates_x = []

            for row in survival_fx.iterrows():
                timeline, km_estimate = row
                coordinates_x.append(timeline.tolist())

            for (x, y) in zip(coordinates_x, coordinates_y):
                coordinates.append([x, y])

            #calculate the progression free survival probability for t=1 year
            surv_for_1 = kmf.predict(12)

            #calculate the progression free survival probability for t=3 years
            surv_for_3 = kmf.predict(36)

            #calculate the progression free survival probability for t=5 years
            surv_for_5 = kmf.predict(60)

            surv_median = int(round(kmf.median_))
            year_1_surv = int(round(surv_for_1 * 100))
            year_3_surv = int(round(surv_for_3 * 100))
            year_5_surv = int(round(surv_for_5 * 100))

            prog_free_stats = {}
            prog_free_stats['Coordinates'] = coordinates
            prog_free_stats['Median'] = surv_median
            prog_free_stats['1Year'] = year_1_surv
            prog_free_stats['3Year'] = year_3_surv
            prog_free_stats['5Year'] = year_5_surv

            return prog_free_stats
예제 #47
0
cluster_list = [s.rstrip() for s in cluster_l]
np.array(cluster_list)
cluster_list



# In[25]:

df = pd.read_csv(clinical_filename, delimiter='\t')
df['group'] = cluster_list
df.head()


# In[26]:

kmf = KaplanMeierFitter()
ax = plt.subplot(111)
plt.rcParams['font.family'] = 'Arial'

for group in sorted(df['group'].unique()):
    g = df.group == group
    T = df[g]['days_to_last_followup']
    C = df[g]['event']
    kmf.fit(T, event_observed=C, label='Cluster - ' + group + ' (' + str(len(T)) + ')')
    kmf.survival_function_.plot(ax=ax,  linewidth=4.0)
kmf2 = plt.gcf()
plt.title(cancer_name,fontsize=30)
plt.xlabel('Time in Days',fontsize=30)
plt.ylabel('Survival Rate',fontsize=30)
plt.rc('xtick', labelsize=20) 
plt.rc('ytick', labelsize=20) 
예제 #48
0
 def test_sort_doesnt_affect_kmf(self, sample_lifetimes):
     T, _ = sample_lifetimes
     kmf = KaplanMeierFitter()
     assert_frame_equal(kmf.fit(T).survival_function_, kmf.fit(sorted(T)).survival_function_)
예제 #49
0
 def test_sort_doesnt_affect_kmf(self, sample_lifetimes):
     T, _ = sample_lifetimes
     kmf = KaplanMeierFitter()
     assert_frame_equal(kmf.fit(T).survival_function_, kmf.fit(sorted(T)).survival_function_)
예제 #50
0
 def kmf(self):
     return KaplanMeierFitter()
예제 #51
0
 def test_predict_method_returns_exact_value_if_given_an_observed_time(self):
     T = [1, 2, 3]
     kmf = KaplanMeierFitter()
     kmf.fit(T)
     time = 1
     assert abs(kmf.predict(time) - kmf.survival_function_.ix[time].values) < 10e-8
예제 #52
0
            else:
                v_status = 1

            patient_info.append(v_status)

            if type(s_time) != str:
                patient_info.append(s_time)

            list_patients.append(patient_info)

    df = pd.DataFrame(list_patients)
    num_patients = len(df)
    print(str(num_patients) + " patients used in analysis")

    df.columns = ['Event', 'Duration']
    kmf = KaplanMeierFitter()
    kmf.fit(durations=df.Duration, event_observed=df.Event)

    #median survival in months
    print("median survival: " + str(kmf.median_) + " months")

    #print(kmf.survival_function_)

    coordinates = []
    survival_fx = (kmf.survival_function_)
    coordinates_y = list(survival_fx.values.flatten())

    coordinates_x = []
    for row in survival_fx.iterrows():
        timeline, km_estimate = row
        coordinates_x.append(timeline.tolist())
df = pd.read_table('clinical_data.tab',sep='\t')
#df2= pd.read_table('genomicMatrix.tab',sep='\t')
#print list(df.columns.values)

    
survival_col = '_OS'
censor_col = '_OS_IND'
clinical_predictors = ['age_at_initial_pathologic_diagnosis']
df = df[pd.notnull(df[survival_col])]


tx = df['history_of_neoadjuvant_treatment']=='Yes'
ax = plt.subplot(111)

kmf1 = KaplanMeierFitter(alpha=0.95)
kmf1.fit(durations=df.ix[tx, survival_col], event_observed=df.ix[tx, censor_col], label=['Tx==Yes'])
kmf1.plot(ax=ax, show_censors=True,  ci_show=False)


kmf2 = KaplanMeierFitter(alpha=0.95)
kmf2.fit(durations=df.ix[~tx, survival_col], event_observed=df.ix[~tx, censor_col], label=['Tx==No'])
kmf2.plot(ax=ax, show_censors=True,  ci_show=False )

add_at_risk_counts(kmf1, kmf2, ax=ax)
plt.title ('Acute myeloid leukemia survival analysis with Tx and without Tx')
plt.xlabel(survival_col)
plt.savefig('km.png')

results = logrank_test(df.ix[tx, survival_col], df.ix[~tx, survival_col], df.ix[tx, censor_col], df.ix[~tx, censor_col], alpha=.99 )
results.print_summary()
예제 #54
0
    def test_kmf_with_inverted_axis(self, block, kmf):

        T = np.random.exponential(size=100)
        kmf = KaplanMeierFitter()
        kmf.fit(T, label="t2")
        ax = kmf.plot(invert_y_axis=True, at_risk_counts=True)

        T = np.random.exponential(3, size=100)
        kmf = KaplanMeierFitter()
        kmf.fit(T, label="t1")
        kmf.plot(invert_y_axis=True, ax=ax, ci_force_lines=False)

        self.plt.title("test_kmf_with_inverted_axis")
        self.plt.show(block=block)
예제 #55
0
 def test_predict_methods_returns_a_scalar_or_a_array_depending_on_input(self, sample_lifetimes):
     kmf = KaplanMeierFitter()
     kmf.fit(sample_lifetimes[0])
     assert not isinstance(kmf.predict(1), Iterable)
     assert isinstance(kmf.predict([1, 2]), Iterable)
예제 #56
0
def survival_estimation(directory=tmp_dir):
    """ Use the Kaplan-Meier Estimate to estimate the survival function
    
        see: https://github.com/CamDavidsonPilon/lifelines    
    """
    from lifelines.estimation import KaplanMeierFitter

    df = get_lifetime_data_frame(recompute=False)
    # Estimate the survival function for all developers
    T = df['duration']
    C = df['censored']
    kmf = KaplanMeierFitter()
    kmf.fit(T, event_observed=C, label='All developers')
    print("Median survival time for all developers: {} years".format(
        kmf.median_))
    fig = plt.figure(figsize=(10, 8))
    ax = plt.subplot(111)
    kmf.plot(ax=ax, color=color_map(2))
    plt.ylabel('Survival probablility')
    plt.xlabel('Time in years')
    plt.ylim(0, 1)
    plt.grid()
    #plt.title("Estimated Survival function for developer activity")
    if directory is None:
        plt.ion()
        plt.show()
    else:
        plt.savefig('{0}/survival_all.png'.format(directory))
        plt.savefig('{0}/survival_all.pdf'.format(directory))
        plt.close()
    # Estimate the survival function by connectivity level
    mtop = df['top'] == 1
    kmf = KaplanMeierFitter()
    fig = plt.figure(figsize=(10, 8))
    ax = plt.subplot(111)
    kmf.fit(T[mtop], event_observed=C[mtop], label="Top connectivity level")
    print("Median survival time for top developers: {} years".format(
        kmf.median_))
    kmf.plot(ax=ax, color=color_map(0))
    kmf.fit(T[~mtop], event_observed=C[~mtop], label="Not in the top")
    print("Median survival time for not top developers: {} years".format(
        kmf.median_))
    kmf.plot(ax=ax, color=color_map(1))
    plt.ylabel('Survival probablility')
    plt.xlabel('Time in years')
    plt.ylim(0, 1)
    plt.grid()
    #plt.title("Estimated Survival function for top level connectivity")
    if directory is None:
        plt.ion()
        plt.show()
    else:
        plt.savefig('{0}/survival_top.png'.format(directory))
        plt.savefig('{0}/survival_top.pdf'.format(directory))
        plt.close()
예제 #57
0
 def test_kmf_plotting(self, block):
     data1 = np.random.exponential(10, size=(100))
     data2 = np.random.exponential(2, size=(200, 1))
     data3 = np.random.exponential(4, size=(500, 1))
     kmf = KaplanMeierFitter()
     kmf.fit(data1, label='test label 1')
     ax = kmf.plot()
     kmf.fit(data2, label='test label 2')
     kmf.plot(ax=ax)
     kmf.fit(data3, label='test label 3')
     kmf.plot(ax=ax)
     self.plt.title("test_kmf_plotting")
     self.plt.show(block=block)
     return