Ejemplo n.º 1
0
    def test_baseline_cumulative_hazard_is_the_same_indp_of_location(self, regression_dataset):
        df = regression_dataset.copy()
        cp1 = CoxPHFitter()
        cp1.fit(df, event_col='E', duration_col='T')

        df_demeaned = regression_dataset.copy()
        df_demeaned[['var1', 'var2', 'var3']] = df_demeaned[['var1', 'var2', 'var3']] - df_demeaned[['var1', 'var2', 'var3']].mean()
        cp2 = CoxPHFitter()
        cp2.fit(df_demeaned, event_col='E', duration_col='T')
        assert_frame_equal(cp2.baseline_cumulative_hazard_, cp1.baseline_cumulative_hazard_)
Ejemplo n.º 2
0
    def test_baseline_survival_is_the_same_indp_of_scale(self, regression_dataset):
        df = regression_dataset.copy()
        cp1 = CoxPHFitter()
        cp1.fit(df, event_col='E', duration_col='T')

        df_descaled = regression_dataset.copy()
        df_descaled[['var1', 'var2', 'var3']] = df_descaled[['var1', 'var2', 'var3']] / df_descaled[['var1', 'var2', 'var3']].std()
        cp2 = CoxPHFitter()
        cp2.fit(df_descaled, event_col='E', duration_col='T')
        assert_frame_equal(cp2.baseline_survival_, cp1.baseline_survival_)
Ejemplo n.º 3
0
    def test_crossval_for_cox_ph_normalized(self, data_pred2, data_pred1):
        cf = CoxPHFitter()
        for data_pred in [data_pred1, data_pred2]:
            data_norm = data_pred.copy()

            times = data_norm['t']
            # Normalize to mean = 0 and standard deviation = 1
            times -= np.mean(times)
            times /= np.std(times)
            data_norm['t'] = times

            x1 = data_norm['x1']
            x1 -= np.mean(x1)
            x1 /= np.std(x1)
            data_norm['x1'] = x1

            if 'x2' in data_norm.columns:
                x2 = data_norm['x2']
                x2 -= np.mean(x2)
                x2 /= np.std(x2)
                data_norm['x2'] = x2

            scores = k_fold_cross_validation(cf, data_norm,
                                             duration_col='t',
                                             event_col='E', k=3,
                                             predictor='predict_partial_hazard')

            mean_score = 1 - np.mean(scores)  # this is because we are using predict_partial_hazard
            expected = 0.9
            msg = "Expected min-mean c-index {:.2f} < {:.2f}"
            assert mean_score > expected, msg.format(expected, mean_score)
Ejemplo n.º 4
0
 def test_coxph_plotting_normalized(self, block):
     df = load_regression_dataset()
     cp = CoxPHFitter()
     cp.fit(df, "T", "E")
     cp.plot(True)
     self.plt.title("test_coxph_plotting_normalized")
     self.plt.show(block=block)
Ejemplo n.º 5
0
 def test_coxph_plotting_with_subset_of_columns(self, block):
     df = load_regression_dataset()
     cp = CoxPHFitter()
     cp.fit(df, "T", "E")
     cp.plot(columns=["var1", "var2"])
     self.plt.title("test_coxph_plotting_with_subset_of_columns")
     self.plt.show(block=block)
Ejemplo n.º 6
0
def test_cross_validator_returns_k_results():
    cf = CoxPHFitter()
    results = utils.k_fold_cross_validation(cf, load_regression_dataset(), duration_col='T', event_col='E', k=3)
    assert len(results) == 3

    results = utils.k_fold_cross_validation(cf, load_regression_dataset(), duration_col='T', event_col='E', k=5)
    assert len(results) == 5
Ejemplo n.º 7
0
    def test_crossval_for_cox_ph_with_normalizing_times(
            self, data_pred2, data_pred1):
        cf = CoxPHFitter()

        for data_pred in [data_pred1, data_pred2]:

            # why does this
            data_norm = data_pred.copy()
            times = data_norm['t']
            # Normalize to mean = 0 and standard deviation = 1
            times -= np.mean(times)
            times /= np.std(times)
            data_norm['t'] = times

            scores = k_fold_cross_validation(
                cf,
                data_norm,
                duration_col='t',
                event_col='E',
                k=3,
                predictor='predict_partial_hazard')

            mean_score = 1 - np.mean(scores)

            expected = 0.9
            msg = "Expected min-mean c-index {:.2f} < {:.2f}"
            assert mean_score > expected, msg.format(expected, mean_score)
Ejemplo n.º 8
0
 def test_log_likelihood_is_available_in_output(self, data_nus):
     cox = CoxPHFitter()
     cox.fit(data_nus,
             duration_col='t',
             event_col='E',
             include_likelihood=True)
     assert abs(cox._log_likelihood - -12.7601409152) < 0.001
Ejemplo n.º 9
0
 def test_coxph_plotting(self, block):
     df = load_regression_dataset()
     cp = CoxPHFitter()
     cp.fit(df, "T", "E")
     cp.plot()
     self.plt.title('test_coxph_plotting')
     self.plt.show(block=block)
Ejemplo n.º 10
0
 def test_predict_log_hazard_relative_to_mean_without_normalization(self, rossi):
     cox = CoxPHFitter(normalize=False)
     cox.fit(rossi, 'week', 'arrest')
     log_relative_hazards = cox.predict_log_hazard_relative_to_mean(rossi)
     means = rossi.mean(0).to_frame().T
     assert cox.predict_partial_hazard(means).values[0][0] != 1.0  
     assert_frame_equal(log_relative_hazards, np.log(cox.predict_partial_hazard(rossi) / cox.predict_partial_hazard(means).squeeze()))
Ejemplo n.º 11
0
    def test_predict_log_hazard_relative_to_mean_with_normalization(self, rossi):
        cox = CoxPHFitter(normalize=True)
        cox.fit(rossi, 'week', 'arrest')

        # they are equal because the data is normalized, so the mean of the covarites is all 0,
        # thus exp(beta * 0) == 1, so exp(beta * X)/exp(beta * 0) = exp(beta * X)
        assert_frame_equal(cox.predict_log_hazard_relative_to_mean(rossi), np.log(cox.predict_partial_hazard(rossi)))
Ejemplo n.º 12
0
    def test_survival_prediction_is_the_same_indp_of_scale(self, regression_dataset):
        df = regression_dataset.copy()

        df_scaled = regression_dataset.copy()
        df_scaled[['var1', 'var2', 'var3']] = df_scaled[['var1', 'var2', 'var3']] * 10.0

        cp1 = CoxPHFitter()
        cp1.fit(df, event_col='E', duration_col='T')

        cp2 = CoxPHFitter()
        cp2.fit(df_scaled, event_col='E', duration_col='T')

        assert_frame_equal(
            cp1.predict_survival_function(df.ix[[0]][['var1', 'var2', 'var3']]),
            cp2.predict_survival_function(df_scaled.ix[[0]][['var1', 'var2', 'var3']])
        )
Ejemplo n.º 13
0
    def test_print_summary(self, rossi):

        import sys
        saved_stdout = sys.stdout
        try:
            out = StringIO()
            sys.stdout = out

            cp = CoxPHFitter()
            cp.fit(rossi, duration_col='week', event_col='arrest')
            cp.print_summary()
            output = out.getvalue().strip().split()
            expected = """n=432, number of events=114

           coef  exp(coef)  se(coef)          z         p  lower 0.95  upper 0.95
fin  -1.897e-01  8.272e-01 9.579e-02 -1.981e+00 4.763e-02  -3.775e-01  -1.938e-03   *
age  -3.500e-01  7.047e-01 1.344e-01 -2.604e+00 9.210e-03  -6.134e-01  -8.651e-02  **
race  1.032e-01  1.109e+00 1.012e-01  1.020e+00 3.078e-01  -9.516e-02   3.015e-01
wexp -7.486e-02  9.279e-01 1.051e-01 -7.124e-01 4.762e-01  -2.809e-01   1.311e-01
mar  -1.421e-01  8.675e-01 1.254e-01 -1.134e+00 2.570e-01  -3.880e-01   1.037e-01
paro -4.134e-02  9.595e-01 9.522e-02 -4.341e-01 6.642e-01  -2.280e-01   1.453e-01
prio  2.639e-01  1.302e+00 8.291e-02  3.182e+00 1.460e-03   1.013e-01   4.264e-01  **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Concordance = 0.640""".strip().split()
            for i in [0, 1, 2, -2, -1]:
                assert output[i] == expected[i]
        finally:
            sys.stdout = saved_stdout
Ejemplo n.º 14
0
 def test_strata_works_if_only_a_single_element_is_in_the_strata(self):
     df = load_holly_molly_polly()
     del df['Start(days)']
     del df['Stop(days)']
     del df['ID']
     cp = CoxPHFitter()
     cp.fit(df, 'T', 'Status', strata=['Stratum'])
     assert True
Ejemplo n.º 15
0
def test_cross_validator_with_specific_loss_function():
    def square_loss(y_actual, y_pred):
        return ((y_actual - y_pred) ** 2).mean()

    cf = CoxPHFitter()
    results_sq = utils.k_fold_cross_validation(cf, load_regression_dataset(), evaluation_measure=square_loss,
                                               duration_col='T', event_col='E')
    results_con = utils.k_fold_cross_validation(cf, load_regression_dataset(), duration_col='T', event_col='E')
    assert list(results_sq) != list(results_con)
Ejemplo n.º 16
0
    def test_using_dataframes_vs_numpy_arrays(self, data_pred2):
        cf = CoxPHFitter()
        cf.fit(data_pred2, 't', 'E')

        X = data_pred2[cf.data.columns]
        assert_frame_equal(
            cf.predict_partial_hazard(np.array(X)),
            cf.predict_partial_hazard(X)
        )
Ejemplo n.º 17
0
def test_cross_validator_with_predictor_and_kwargs():
    cf = CoxPHFitter()
    results_06 = utils.k_fold_cross_validation(cf,
                                               load_regression_dataset(),
                                               duration_col='T',
                                               k=3,
                                               predictor="predict_percentile",
                                               predictor_kwargs={'p': 0.6})
    assert len(results_06) == 3
Ejemplo n.º 18
0
def test_cross_validator_with_predictor():
    cf = CoxPHFitter()
    results = utils.k_fold_cross_validation(cf,
                                            load_regression_dataset(),
                                            duration_col='T',
                                            event_col='E',
                                            k=3,
                                            predictor="predict_expectation")
    assert len(results) == 3
Ejemplo n.º 19
0
 def test_coxph_plotting_with_subset_of_columns_and_standardized(
         self, block):
     df = load_regression_dataset()
     cp = CoxPHFitter()
     cp.fit(df, "T", "E")
     cp.plot(True, columns=['var1', 'var2'])
     self.plt.title(
         'test_coxph_plotting_with_subset_of_columns_and_standardized')
     self.plt.show(block=block)
Ejemplo n.º 20
0
    def test_fit_methods_require_duration_col(self):
        X = load_regression_dataset()

        aaf = AalenAdditiveFitter()
        cph = CoxPHFitter()

        with pytest.raises(TypeError):
            aaf.fit(X)
        with pytest.raises(TypeError):
            cph.fit(X)
Ejemplo n.º 21
0
    def test_p_value_against_Survival_Analysis_by_John_Klein_and_Melvin_Moeschberger(self):
        # see table 8.1 in Survival Analysis by John P. Klein and Melvin L. Moeschberger, Second Edition
        df = load_larynx()
        cf = CoxPHFitter()
        cf.fit(df, duration_col='time', event_col='death')

        # p-values
        actual_p = cf._compute_p_values()
        expected_p = np.array([0.1847, 0.7644,  0.0730, 0.00])
        npt.assert_array_almost_equal(actual_p, expected_p, decimal=2)
Ejemplo n.º 22
0
    def test_summary(self, rossi):

        cp = CoxPHFitter()
        cp.fit(rossi, duration_col='week', event_col='arrest')
        summDf = cp.summary
        expectedColumns = [
            'coef', 'exp(coef)', 'se(coef)', 'z', 'p', 'lower 0.95',
            'upper 0.95'
        ]
        assert all([col in summDf.columns for col in expectedColumns])
Ejemplo n.º 23
0
def test_cross_validator_returns_fitters_k_results():
    cf = CoxPHFitter()
    fitters = [cf, cf]
    results = utils.k_fold_cross_validation(fitters, load_regression_dataset(), duration_col='T', event_col='E', k=3)
    assert len(results) == 2
    assert len(results[0]) == len(results[1]) == 3

    results = utils.k_fold_cross_validation(fitters, load_regression_dataset(), duration_col='T', event_col='E', k=5)
    assert len(results) == 2
    assert len(results[0]) == len(results[1]) == 5
Ejemplo n.º 24
0
    def test_se_against_Survival_Analysis_by_John_Klein_and_Melvin_Moeschberger(self):
        # see table 8.1 in Survival Analysis by John P. Klein and Melvin L. Moeschberger, Second Edition
        df = load_larynx()
        cf = CoxPHFitter()
        cf.fit(df, duration_col='time', event_col='death')

        # standard errors
        actual_se = cf._compute_standard_errors().values
        expected_se = np.array([[0.0143,  0.4623,  0.3561,  0.4222]])
        npt.assert_array_almost_equal(actual_se, expected_se, decimal=2)
Ejemplo n.º 25
0
 def test_output_with_strata_against_R(self, rossi):
     """
     rossi <- read.csv('.../lifelines/datasets/rossi.csv')
     r = coxph(formula = Surv(week, arrest) ~ fin + age + strata(race,
                 paro, mar, wexp) + prio, data = rossi)
     """
     expected = np.array([[-0.3355, -0.0590, 0.1002]])
     cf = CoxPHFitter()
     cf.fit(rossi, duration_col='week', event_col='arrest', strata=['race', 'paro', 'mar', 'wexp'])
     npt.assert_array_almost_equal(cf.hazards_.values, expected, decimal=3)
Ejemplo n.º 26
0
    def test_duration_vector_can_be_normalized(self):
        df = load_kidney_transplant()
        t = df['time']
        normalized_df = df.copy()
        normalized_df['time'] = (normalized_df['time'] - t.mean()) / t.std()

        for fitter in [CoxPHFitter(), AalenAdditiveFitter()]:
            # we drop indexs since aaf will have a different "time" index.
            hazards = fitter.fit(df, duration_col='time', event_col='death').hazards_.reset_index(drop=True)
            hazards_norm = fitter.fit(normalized_df, duration_col='time', event_col='death').hazards_.reset_index(drop=True)
            assert_frame_equal(hazards, hazards_norm)
Ejemplo n.º 27
0
 def test_penalized_output_against_R(self, rossi):
     # R code:
     #
     # rossi <- read.csv('.../lifelines/datasets/rossi.csv')
     # mod.allison <- coxph(Surv(week, arrest) ~ ridge(fin, age, race, wexp, mar, paro, prio,
     #                                                 theta=1.0, scale=TRUE), data=rossi)
     # cat(round(mod.allison$coefficients, 4), sep=", ")
     expected = np.array([[-0.3761, -0.0565, 0.3099, -0.1532, -0.4295, -0.0837, 0.0909]])
     cf = CoxPHFitter(penalizer=1.0)
     cf.fit(rossi, duration_col='week', event_col='arrest')
     npt.assert_array_almost_equal(cf.hazards_.values, expected, decimal=3)
Ejemplo n.º 28
0
    def test_predict_methods_in_regression_return_same_types(self):
        X = load_regression_dataset()

        aaf = AalenAdditiveFitter()
        cph = CoxPHFitter()

        aaf.fit(X, duration_col='T', event_col='E')
        cph.fit(X, duration_col='T', event_col='E')

        for fit_method in ['predict_percentile', 'predict_median', 'predict_expectation', 'predict_survival_function', 'predict_cumulative_hazard']:
            assert isinstance(getattr(aaf, fit_method)(X), type(getattr(cph, fit_method)(X)))
Ejemplo n.º 29
0
 def test_hazard_works_as_intended_with_strata_against_R_output(self, rossi):
     """
     > library(survival)
     > rossi = read.csv('.../lifelines/datasets/rossi.csv')
     > r = coxph(formula = Surv(week, arrest) ~ fin + age + strata(race,
         paro, mar, wexp) + prio, data = rossi)
     > basehaz(r, centered=TRUE)
     """
     cp = CoxPHFitter()
     cp.fit(rossi, 'week', 'arrest', strata=['race', 'paro', 'mar', 'wexp'])
     npt.assert_almost_equal(cp.baseline_cumulative_hazard_[(0, 0, 0, 0)].ix[[14, 35, 37, 43, 52]].values, [0.076600555, 0.169748261, 0.272088807, 0.396562717, 0.396562717], decimal=2)
     npt.assert_almost_equal(cp.baseline_cumulative_hazard_[(0, 0, 0, 1)].ix[[27, 43, 48, 52]].values, [0.095499001, 0.204196905, 0.338393113, 0.338393113], decimal=2)
Ejemplo n.º 30
0
    def test_coef_output_against_Survival_Analysis_by_John_Klein_and_Melvin_Moeschberger(self):
        # see example 8.3 in Survival Analysis by John P. Klein and Melvin L. Moeschberger, Second Edition
        df = load_kidney_transplant(usecols=['time', 'death',
                                             'black_male', 'white_male',
                                             'black_female'])
        cf = CoxPHFitter()
        cf.fit(df, duration_col='time', event_col='death')

        # coefs
        actual_coefs = cf.hazards_.values
        expected_coefs = np.array([[0.1596, 0.2484, 0.6567]])
        npt.assert_array_almost_equal(actual_coefs, expected_coefs, decimal=4)