Example #1
0
    def test_aalen_additive_fit_with_censor(self):
        # this is a visual test of the fitting the cumulative
        # hazards.
        matplotlib = pytest.importorskip("matplotlib")
        from matplotlib import pyplot as plt

        n = 2500
        d = 6
        timeline = np.linspace(0, 70, 10000)
        hz, coef, X = generate_hazard_rates(n, d, timeline)
        X.columns = coef.columns
        cumulative_hazards = pd.DataFrame(cumulative_integral(coef.values, timeline),
                                          index=timeline, columns=coef.columns)
        T = generate_random_lifetimes(hz, timeline)
        X['T'] = T
        X['E'] = np.random.binomial(1, 0.99, n)

        aaf = AalenAdditiveFitter()
        aaf.fit(X, 'T', 'E')

        for i in range(d + 1):
            ax = plt.subplot(d + 1, 1, i + 1)
            col = cumulative_hazards.columns[i]
            ax = cumulative_hazards[col].ix[:15].plot(legend=False, ax=ax)
            ax = aaf.plot(ix=slice(0, 15), ax=ax, columns=[col], legend=False)
        plt.show()
Example #2
0
 def test_aaf_panel_dataset_with_no_censorship(self):
     panel_dataset = load_panel_test()
     aaf = AalenAdditiveFitter()
     aaf.fit(panel_dataset, id_col='id', duration_col='t')
     expected = pd.Series([True] * 9, index=range(1, 10))
     expected.index.name = 'id'
     assert_series_equal(aaf.event_observed, expected)
Example #3
0
 def test_predict_cumulative_hazard_inputs(self, data_pred1):
     aaf = AalenAdditiveFitter()
     aaf.fit(data_pred1, duration_col='t', event_col='E',)
     x = data_pred1.ix[:5].drop(['t', 'E'], axis=1)
     y_df = aaf.predict_cumulative_hazard(x)
     y_np = aaf.predict_cumulative_hazard(x.values)
     assert_frame_equal(y_df, y_np)
Example #4
0
 def test_using_a_custom_timeline_in_static_fitting(self, rossi):
     aaf = AalenAdditiveFitter()
     timeline = np.arange(10)
     aaf.fit(rossi, event_col='arrest', duration_col='week', timeline=timeline)
     npt.assert_array_equal(aaf.hazards_.index.values, timeline)
     npt.assert_array_equal(aaf.cumulative_hazards_.index.values, timeline)
     npt.assert_array_equal(aaf.variance_.index.values, timeline)
     npt.assert_array_equal(aaf.timeline, timeline)
Example #5
0
 def test_large_dimensions_for_recursion_error(self):
     n = 500
     d = 50
     X = pd.DataFrame(np.random.randn(n, d))
     T = np.random.exponential(size=n)
     X['T'] = T
     aaf = AalenAdditiveFitter()
     aaf.fit(X, duration_col='T')
Example #6
0
    def test_aaf_panel_dataset(self):
        matplotlib = pytest.importorskip("matplotlib")
        from matplotlib import pyplot as plt

        panel_dataset = load_panel_test()
        aaf = AalenAdditiveFitter()
        aaf.fit(panel_dataset, id_col='id', duration_col='t', event_col='E')
        aaf.plot()
Example #7
0
 def test_using_a_custom_timeline_in_varying_fitting(self):
     panel_dataset = load_panel_test()
     aaf = AalenAdditiveFitter()
     timeline = np.arange(10)
     aaf.fit(panel_dataset, id_col='id', duration_col='t', timeline=timeline)
     npt.assert_array_equal(aaf.hazards_.index.values, timeline)
     npt.assert_array_equal(aaf.cumulative_hazards_.index.values, timeline)
     npt.assert_array_equal(aaf.variance_.index.values, timeline)
     npt.assert_array_equal(aaf.timeline, timeline)
Example #8
0
    def test_fit_methods_require_duration_col(self):
        X = load_regression_dataset()

        aaf = AalenAdditiveFitter()
        cph = CoxPHFitter()

        with pytest.raises(TypeError):
            aaf.fit(X)
        with pytest.raises(TypeError):
            cph.fit(X)
Example #9
0
    def test_predict_methods_in_regression_return_same_types(self):
        X = load_regression_dataset()

        aaf = AalenAdditiveFitter()
        cph = CoxPHFitter()

        aaf.fit(X, duration_col='T', event_col='E')
        cph.fit(X, duration_col='T', event_col='E')

        for fit_method in ['predict_percentile', 'predict_median', 'predict_expectation', 'predict_survival_function', 'predict_cumulative_hazard']:
            assert isinstance(getattr(aaf, fit_method)(X), type(getattr(cph, fit_method)(X)))
Example #10
0
    def test_prediction_methods_respect_index(self, data_pred2):
        x = data_pred2[['x1', 'x2']].ix[:3].sort_index(ascending=False)
        expected_index = pd.Index(np.array([3, 2, 1, 0]))

        cph = CoxPHFitter()
        cph.fit(data_pred2, duration_col='t', event_col='E')
        npt.assert_array_equal(cph.predict_partial_hazard(x).index, expected_index)
        npt.assert_array_equal(cph.predict_percentile(x).index, expected_index)
        npt.assert_array_equal(cph.predict_expectation(x).index, expected_index)

        aaf = AalenAdditiveFitter()
        aaf.fit(data_pred2, duration_col='t', event_col='E')
        npt.assert_array_equal(aaf.predict_percentile(x).index, expected_index)
        npt.assert_array_equal(aaf.predict_expectation(x).index, expected_index)
Example #11
0
    def test_fit_methods_can_accept_optional_event_col_param(self):
        X = load_regression_dataset()

        aaf = AalenAdditiveFitter()
        aaf.fit(X, 'T', event_col='E')
        assert_series_equal(aaf.event_observed.sort_index(), X['E'].astype(bool), check_names=False)

        aaf.fit(X, 'T')
        npt.assert_array_equal(aaf.event_observed.values, np.ones(X.shape[0]))

        cph = CoxPHFitter()
        cph.fit(X, 'T', event_col='E')
        assert_series_equal(cph.event_observed.sort_index(), X['E'].astype(bool), check_names=False)

        cph.fit(X, 'T')
        npt.assert_array_equal(cph.event_observed.values, np.ones(X.shape[0]))
Example #12
0
    def test_aalen_additive_median_predictions_split_data(self):
        # This tests to make sure that my median predictions statisfy
        # the prediction are greater than the actual 1/2 the time.
        # generate some hazard rates and a survival data set
        n = 2500
        d = 5
        timeline = np.linspace(0, 70, 5000)
        hz, coef, X = generate_hazard_rates(n, d, timeline)
        T = generate_random_lifetimes(hz, timeline)
        X['T'] = T
        # fit it to Aalen's model
        aaf = AalenAdditiveFitter()
        aaf.fit(X, 'T')

        # predictions
        T_pred = aaf.predict_median(X[list(range(6))])
        assert abs((T_pred.values > T).mean() - 0.5) < 0.05
Example #13
0
    def test_nn_cumulative_hazard_will_set_cum_hazards_to_0(self, rossi):
        aaf = AalenAdditiveFitter(nn_cumulative_hazard=False)
        aaf.fit(rossi, event_col='arrest', duration_col='week')
        cum_hazards = aaf.predict_cumulative_hazard(rossi)
        assert (cum_hazards < 0).stack().mean() > 0

        aaf = AalenAdditiveFitter(nn_cumulative_hazard=True)
        aaf.fit(rossi, event_col='arrest', duration_col='week')
        cum_hazards = aaf.predict_cumulative_hazard(rossi)
        assert (cum_hazards < 0).stack().mean() == 0
Example #14
0
    def test_aalen_additive_smoothed_plot(self, block):
        # this is a visual test of the fitting the cumulative
        # hazards.
        n = 2500
        d = 3
        timeline = np.linspace(0, 150, 5000)
        hz, coef, X = generate_hazard_rates(n, d, timeline)
        T = generate_random_lifetimes(hz, timeline) + 0.1 * np.random.uniform(size=(n, 1))
        C = np.random.binomial(1, 0.8, size=n)
        X['T'] = T
        X['E'] = C

        # fit the aaf, no intercept as it is already built into X, X[2] is ones
        aaf = AalenAdditiveFitter(coef_penalizer=0.1, fit_intercept=False)
        aaf.fit(X, 'T', 'E')
        ax = aaf.smoothed_hazards_(1).iloc[0:aaf.cumulative_hazards_.shape[0] - 500].plot()
        ax.set_xlabel("time")
        ax.set_title('test_aalen_additive_smoothed_plot')
        self.plt.show(block=block)
        return
Example #15
0
    def test_aalen_additive_plot(self):
        # this is a visual test of the fitting the cumulative
        # hazards.
        n = 2500
        d = 3
        timeline = np.linspace(0, 70, 10000)
        hz, coef, X = generate_hazard_rates(n, d, timeline)
        T = generate_random_lifetimes(hz, timeline)
        C = np.random.binomial(1, 1., size=n)
        X['T'] = T
        X['E'] = C

        # fit the aaf, no intercept as it is already built into X, X[2] is ones
        aaf = AalenAdditiveFitter(coef_penalizer=0.1, fit_intercept=False)

        aaf.fit(X, 'T', 'E')
        ax = aaf.plot(iloc=slice(0, aaf.cumulative_hazards_.shape[0] - 100))
        ax.set_xlabel("time")
        ax.set_title('test_aalen_additive_plot')
        self.plt.show()
        return
Example #16
0
    def test_aalen_additive_fit_no_censor(self, block):
        n = 2500
        d = 6
        timeline = np.linspace(0, 70, 10000)
        hz, coef, X = generate_hazard_rates(n, d, timeline)
        X.columns = coef.columns
        cumulative_hazards = pd.DataFrame(cumulative_integral(coef.values, timeline),
                                          index=timeline, columns=coef.columns)
        T = generate_random_lifetimes(hz, timeline)
        X['T'] = T
        X['E'] = np.random.binomial(1, 1, n)
        aaf = AalenAdditiveFitter()
        aaf.fit(X, 'T', 'E')

        for i in range(d + 1):
            ax = self.plt.subplot(d + 1, 1, i + 1)
            col = cumulative_hazards.columns[i]
            ax = cumulative_hazards[col].loc[:15].plot(legend=False, ax=ax)
            ax = aaf.plot(loc=slice(0, 15), ax=ax, columns=[col], legend=False)
        self.plt.title("test_aalen_additive_fit_no_censor")
        self.plt.show(block=block)
        return
Example #17
0
    def test_aaf_panel_dataset(self, block):

        panel_dataset = load_panel_test()
        aaf = AalenAdditiveFitter()
        aaf.fit(panel_dataset, id_col='id', duration_col='t', event_col='E')
        aaf.plot()
        self.plt.title("test_aaf_panel_dataset")
        self.plt.show(block=block)
        return
Example #18
0
    def test_duration_vector_can_be_normalized(self):
        df = load_kidney_transplant()
        t = df['time']
        normalized_df = df.copy()
        normalized_df['time'] = (normalized_df['time'] - t.mean()) / t.std()

        for fitter in [CoxPHFitter(), AalenAdditiveFitter()]:
            # we drop indexs since aaf will have a different "time" index.
            hazards = fitter.fit(
                df, duration_col='time',
                event_col='death').hazards_.reset_index(drop=True)
            hazards_norm = fitter.fit(
                normalized_df, duration_col='time',
                event_col='death').hazards_.reset_index(drop=True)
            assert_frame_equal(hazards, hazards_norm)
Example #19
0
    def test_aalen_additive_fit_no_censor(self, block):
        n = 2500
        d = 6
        timeline = np.linspace(0, 70, 10000)
        hz, coef, X = generate_hazard_rates(n, d, timeline)
        X.columns = coef.columns
        cumulative_hazards = pd.DataFrame(cumulative_integral(
            coef.values, timeline),
                                          index=timeline,
                                          columns=coef.columns)
        T = generate_random_lifetimes(hz, timeline)
        X["T"] = T
        X["E"] = np.random.binomial(1, 1, n)
        aaf = AalenAdditiveFitter()
        aaf.fit(X, "T", "E")

        for i in range(d + 1):
            ax = self.plt.subplot(d + 1, 1, i + 1)
            col = cumulative_hazards.columns[i]
            ax = cumulative_hazards[col].loc[:15].plot(legend=False, ax=ax)
            ax = aaf.plot(loc=slice(0, 15), ax=ax, columns=[col], legend=False)
        self.plt.title("test_aalen_additive_fit_no_censor")
        self.plt.show(block=block)
        return
Example #20
0
    def test_penalizer_reduces_norm_of_hazards(self, rossi):
        from numpy.linalg import norm

        aaf_without_penalizer = AalenAdditiveFitter(coef_penalizer=0., smoothing_penalizer=0.)
        assert aaf_without_penalizer.coef_penalizer == aaf_without_penalizer.smoothing_penalizer == 0.0
        aaf_without_penalizer.fit(rossi, event_col='arrest', duration_col='week')

        aaf_with_penalizer = AalenAdditiveFitter(coef_penalizer=10., smoothing_penalizer=10.)
        aaf_with_penalizer.fit(rossi, event_col='arrest', duration_col='week')
        assert norm(aaf_with_penalizer.cumulative_hazards_) <= norm(aaf_without_penalizer.cumulative_hazards_)
Example #21
0
    def test_input_column_order_is_equal_to_output_hazards_order(self, rossi):
        aaf = AalenAdditiveFitter()
        expected = ['fin', 'age', 'race', 'wexp', 'mar', 'paro', 'prio']
        aaf.fit(rossi, event_col='arrest', duration_col='week')
        assert list(aaf.cumulative_hazards_.columns.drop('baseline')) == expected

        aaf = AalenAdditiveFitter(fit_intercept=False)
        expected = ['fin', 'age', 'race', 'wexp', 'mar', 'paro', 'prio']
        aaf.fit(rossi, event_col='arrest', duration_col='week')
        assert list(aaf.cumulative_hazards_.columns) == expected
Example #22
0
    def test_crossval_for_aalen_add(self, data_pred2, data_pred1):
        aaf = AalenAdditiveFitter()
        for data_pred in [data_pred1, data_pred2]:
            mean_scores = []
            for repeat in range(20):
                scores = k_fold_cross_validation(aaf,
                                                 data_pred,
                                                 duration_col='t',
                                                 event_col='E',
                                                 k=3)
                mean_scores.append(np.mean(scores))

            expected = 0.90
            msg = "Expected min-mean c-index {:.2f} < {:.2f}"
            assert np.mean(mean_scores) > expected, msg.format(
                expected, scores.mean())
Example #23
0
 def test_predict_cumulative_hazard_inputs(self, data_pred1):
     aaf = AalenAdditiveFitter()
     aaf.fit(
         data_pred1,
         duration_col='t',
         event_col='E',
     )
     x = data_pred1.ix[:5].drop(['t', 'E'], axis=1)
     y_df = aaf.predict_cumulative_hazard(x)
     y_np = aaf.predict_cumulative_hazard(x.values)
     assert_frame_equal(y_df, y_np)
Example #24
0
    def test_prediction_methods_respect_index(self, data_pred2):
        x = data_pred2[['x1', 'x2']].ix[:3].sort_index(ascending=False)
        expected_index = pd.Index(np.array([3, 2, 1, 0]))

        cph = CoxPHFitter()
        cph.fit(data_pred2, duration_col='t', event_col='E')
        npt.assert_array_equal(cph.predict_partial_hazard(x).index, expected_index)
        npt.assert_array_equal(cph.predict_percentile(x).index, expected_index)
        npt.assert_array_equal(cph.predict(x).index, expected_index)
        npt.assert_array_equal(cph.predict_expectation(x).index, expected_index)

        aaf = AalenAdditiveFitter()
        aaf.fit(data_pred2, duration_col='t', event_col='E')
        npt.assert_array_equal(aaf.predict_percentile(x).index, expected_index)
        npt.assert_array_equal(aaf.predict(x).index, expected_index)
        npt.assert_array_equal(aaf.predict_expectation(x).index, expected_index)
Example #25
0
    def test_fit_methods_can_accept_optional_event_col_param(self):
        X = load_regression_dataset()

        aaf = AalenAdditiveFitter()
        aaf.fit(X, 'T', event_col='E')
        assert_series_equal(aaf.event_observed.sort_index(), X['E'].astype(bool), check_names=False)

        aaf.fit(X, 'T')
        npt.assert_array_equal(aaf.event_observed.values, np.ones(X.shape[0]))

        cph = CoxPHFitter()
        cph.fit(X, 'T', event_col='E')
        assert_series_equal(cph.event_observed.sort_index(), X['E'].astype(bool), check_names=False)

        cph.fit(X, 'T')
        npt.assert_array_equal(cph.event_observed.values, np.ones(X.shape[0]))
Example #26
0
            for k,v in sales_dict.iteritems():
                #investigate why some negative leftovers on certain valid dates , more repairs than sales ???
                print v
                if v>0:
                    data_events = np.append(data_events,np.zeros(v))
                    extra_var = np.append(extra_var,np.array([int(k.split('/')[1])]*v))

            t=[]
            if len(data_events)==0:
                all_data.append([0]*19)
                continue

            data_events[data_events==0] = 60
            C= data_events <60
            aaf = AAF(penalizer=0.5)

            #extra_vars_df["winter"] = (extra_var == 1.0).astype(float)
            #extra_vars_df["spring"] = (extra_var == 2.0).astype(float)
            #extra_vars_df["summer"] = (extra_var == 3.0).astype(float)
            #extra_vars_df["fall"] = (extra_var == 4.0).astype(float)

            extra_vars_df[str(1)] = (extra_var == 1).astype(float)
            extra_vars_df[str(2)] = (extra_var == 2).astype(float)
            extra_vars_df[str(3)] = (extra_var == 3).astype(float)
            extra_vars_df[str(4)] = (extra_var == 4).astype(float)
            extra_vars_df[str(5)] = (extra_var == 5).astype(float)
            extra_vars_df[str(6)] = (extra_var == 6).astype(float)
            extra_vars_df[str(7)] = (extra_var == 7).astype(float)
            extra_vars_df[str(8)] = (extra_var == 8).astype(float)
            extra_vars_df[str(9)] = (extra_var == 9).astype(float)
Example #27
0
 def regression_models(self):
     return [CoxPHFitter(), AalenAdditiveFitter(), CoxPHFitter(strata=['race', 'paro', 'mar', 'wexp'])]
Example #28
0
 def test_dataframe_input_with_nonstandard_index(self):
     aaf = AalenAdditiveFitter()
     df = pd.DataFrame([(16, True, True), (1, True, True), (4, False, True)],
                       columns=['duration', 'done_feeding', 'white'],
                       index=['a', 'b', 'c'])
     aaf.fit(df, duration_col='duration', event_col='done_feeding')
Example #29
0
    def test_swapping_order_of_columns_in_a_df_is_okay(self, rossi):
        aaf = AalenAdditiveFitter()
        aaf.fit(rossi, event_col='arrest', duration_col='week')

        misorder = ['age', 'race', 'wexp', 'mar', 'paro', 'prio', 'fin']
        natural_order = rossi.columns.drop(['week', 'arrest'])
        deleted_order = rossi.columns.difference(['week', 'arrest'])
        assert_frame_equal(aaf.predict_median(rossi[natural_order]), aaf.predict_median(rossi[misorder]))
        assert_frame_equal(aaf.predict_median(rossi[natural_order]), aaf.predict_median(rossi[deleted_order]))

        aaf = AalenAdditiveFitter(fit_intercept=False)
        aaf.fit(rossi, event_col='arrest', duration_col='week')
        assert_frame_equal(aaf.predict_median(rossi[natural_order]), aaf.predict_median(rossi[misorder]))
        assert_frame_equal(aaf.predict_median(rossi[natural_order]), aaf.predict_median(rossi[deleted_order]))
Example #30
0
 def test_dataframe_input_with_nonstandard_index(self):
     aaf = AalenAdditiveFitter()
     df = pd.DataFrame([(16, True, True), (1, True, True), (4, False, True)],
                       columns=['duration', 'done_feeding', 'white'],
                       index=['a', 'b', 'c'])
     aaf.fit(df, duration_col='duration', event_col='done_feeding')
# We will use the Using a Aallen's additive model to estimate the survival times in the testing set using a model built from a training set using 100 fold sub-sampling of the data. 

# In[33]:

#Go through each training testing monteCarlo sampling and train/predict
predictions=[]
for i in range(trainLabels.shape[1]):
    X = patsy.dmatrix('age + grade + stage -1', clinical, return_type='dataframe')
    X['T'] = clinical['OS_OS']
    X['C'] = clinical['OS_vital_status']
    
    trainX = X.ix[trainLabels[i],:].reset_index()
    testX = X.ix[testLabels[i],:].reset_index()

    #Build model and train
    aaf = AalenAdditiveFitter(penalizer=1., fit_intercept=True)
    aaf.fit(trainX.drop(['index'], axis=1), duration_col='T', event_col='C',show_progress=False)
    #Predict on testing data
    median = aaf.predict_median(testX.drop(['T','C', 'index'], axis=1))
    median.index = testX['index']
    predictions.append(median.replace([np.inf, -np.inf, np.nan], 0))


# ###Saving Results to Synapse and ask Synapse to evaluate our predictions
# To document what we have done we will start by storing this code in Synapse as a file Entity.

# In[34]:

codeEntity = synapseclient.File('tcga_survival_analysis.py', parentId='syn1720423')
codeEntity = syn.store(codeEntity)
Example #32
0
# -*- coding: utf-8 -*-
# aalen additive

if __name__ == "__main__":
    import pandas as pd
    import time

    from lifelines.estimation import AalenAdditiveFitter
    from lifelines.datasets import load_rossi

    df = load_rossi()
    df = pd.concat([df] * 5).reset_index(drop=True)
    print("Size: ", df.shape)
    aaf = AalenAdditiveFitter()
    start_time = time.time()
    aaf.fit(df, duration_col="week", event_col="arrest")
    print("--- %s seconds ---" % (time.time() - start_time))
    print(aaf.score_)
# In[33]:

#Go through each training testing monteCarlo sampling and train/predict
predictions = []
for i in range(trainLabels.shape[1]):
    X = patsy.dmatrix('age + grade + stage -1',
                      clinical,
                      return_type='dataframe')
    X['T'] = clinical['OS_OS']
    X['C'] = clinical['OS_vital_status']

    trainX = X.ix[trainLabels[i], :].reset_index()
    testX = X.ix[testLabels[i], :].reset_index()

    #Build model and train
    aaf = AalenAdditiveFitter(penalizer=1., fit_intercept=True)
    aaf.fit(trainX.drop(['index'], axis=1),
            duration_col='T',
            event_col='C',
            show_progress=False)
    #Predict on testing data
    median = aaf.predict_median(testX.drop(['T', 'C', 'index'], axis=1))
    median.index = testX['index']
    predictions.append(median.replace([np.inf, -np.inf, np.nan], 0))

# ###Saving Results to Synapse and ask Synapse to evaluate our predictions
# To document what we have done we will start by storing this code in Synapse as a file Entity.

# In[34]:

codeEntity = synapseclient.File('tcga_survival_analysis.py',
Example #34
0
    def test_swapping_order_of_columns_in_a_df_is_okay(self):
        rossi = load_rossi()
        aaf = AalenAdditiveFitter()
        aaf.fit(rossi, event_col='week', duration_col='arrest')

        misorder = ['age', 'race', 'wexp', 'mar', 'paro', 'prio', 'fin']
        natural_order = rossi.columns.drop(['week', 'arrest'])
        deleted_order = rossi.columns - ['week', 'arrest']
        assert_frame_equal(aaf.predict_median(rossi[natural_order]), aaf.predict_median(rossi[misorder]))
        assert_frame_equal(aaf.predict_median(rossi[natural_order]), aaf.predict_median(rossi[deleted_order]))

        aaf = AalenAdditiveFitter(fit_intercept=False)
        aaf.fit(rossi, event_col='week', duration_col='arrest')
        assert_frame_equal(aaf.predict_median(rossi[natural_order]), aaf.predict_median(rossi[misorder]))
        assert_frame_equal(aaf.predict_median(rossi[natural_order]), aaf.predict_median(rossi[deleted_order]))