def test_predict_log_hazard_relative_to_mean_with_normalization(self, rossi): cox = CoxPHFitter(normalize=True) cox.fit(rossi, 'week', 'arrest') # they are equal because the data is normalized, so the mean of the covarites is all 0, # thus exp(beta * 0) == 1, so exp(beta * X)/exp(beta * 0) = exp(beta * X) assert_frame_equal(cox.predict_log_hazard_relative_to_mean(rossi), np.log(cox.predict_partial_hazard(rossi)))
def test_coxph_plotting_normalized(self, block): df = load_regression_dataset() cp = CoxPHFitter() cp.fit(df, "T", "E") cp.plot(True) self.plt.title('test_coxph_plotting') self.plt.show(block=block)
def test_print_summary(self, rossi): import sys saved_stdout = sys.stdout try: out = StringIO() sys.stdout = out cp = CoxPHFitter() cp.fit(rossi, duration_col='week', event_col='arrest') cp.print_summary() output = out.getvalue().strip().split() expected = """n=432, number of events=114 coef exp(coef) se(coef) z p lower 0.95 upper 0.95 fin -1.897e-01 8.272e-01 9.579e-02 -1.981e+00 4.763e-02 -3.775e-01 -1.938e-03 * age -3.500e-01 7.047e-01 1.344e-01 -2.604e+00 9.210e-03 -6.134e-01 -8.651e-02 ** race 1.032e-01 1.109e+00 1.012e-01 1.020e+00 3.078e-01 -9.516e-02 3.015e-01 wexp -7.486e-02 9.279e-01 1.051e-01 -7.124e-01 4.762e-01 -2.809e-01 1.311e-01 mar -1.421e-01 8.675e-01 1.254e-01 -1.134e+00 2.570e-01 -3.880e-01 1.037e-01 paro -4.134e-02 9.595e-01 9.522e-02 -4.341e-01 6.642e-01 -2.280e-01 1.453e-01 prio 2.639e-01 1.302e+00 8.291e-02 3.182e+00 1.460e-03 1.013e-01 4.264e-01 ** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Concordance = 0.640""".strip().split() for i in [0, 1, 2, -2, -1]: assert output[i] == expected[i] finally: sys.stdout = saved_stdout
def test_strata_works_if_only_a_single_element_is_in_the_strata(self): df = load_holly_molly_polly() del df['Start(days)'] del df['Stop(days)'] del df['ID'] cp = CoxPHFitter() cp.fit(df, 'T', 'Status', strata=['Stratum']) assert True
def test_p_value_against_Survival_Analysis_by_John_Klein_and_Melvin_Moeschberger(self): # see table 8.1 in Survival Analysis by John P. Klein and Melvin L. Moeschberger, Second Edition df = load_larynx() cf = CoxPHFitter() cf.fit(df, duration_col='time', event_col='death') # p-values actual_p = cf._compute_p_values() expected_p = np.array([0.1847, 0.7644, 0.0730, 0.00]) npt.assert_array_almost_equal(actual_p, expected_p, decimal=2)
def test_fit_methods_require_duration_col(self): X = load_regression_dataset() aaf = AalenAdditiveFitter() cph = CoxPHFitter() with pytest.raises(TypeError): aaf.fit(X) with pytest.raises(TypeError): cph.fit(X)
def test_se_against_Survival_Analysis_by_John_Klein_and_Melvin_Moeschberger(self): # see table 8.1 in Survival Analysis by John P. Klein and Melvin L. Moeschberger, Second Edition df = load_larynx() cf = CoxPHFitter(normalize=False) cf.fit(df, duration_col='time', event_col='death') # standard errors actual_se = cf._compute_standard_errors().values expected_se = np.array([[0.0143, 0.4623, 0.3561, 0.4222]]) npt.assert_array_almost_equal(actual_se, expected_se, decimal=2)
def test_output_with_strata_against_R(self, rossi): """ rossi <- read.csv('.../lifelines/datasets/rossi.csv') r = coxph(formula = Surv(week, arrest) ~ fin + age + strata(race, paro, mar, wexp) + prio, data = rossi) """ expected = np.array([[-0.335, -0.059, 0.100]]) cf = CoxPHFitter(normalize=False) cf.fit(rossi, duration_col='week', event_col='arrest', strata=['race', 'paro', 'mar', 'wexp']) npt.assert_array_almost_equal(cf.hazards_.values, expected, decimal=3)
def test_penalized_output_against_R(self, rossi): # R code: # # rossi <- read.csv('.../lifelines/datasets/rossi.csv') # mod.allison <- coxph(Surv(week, arrest) ~ ridge(fin, age, race, wexp, mar, paro, prio, # theta=1.0, scale=FALSE), data=rossi) # cat(round(mod.allison$coefficients, 4), sep=", ") expected = np.array([[-0.3641, -0.0580, 0.2894, -0.1496, -0.3837, -0.0822, 0.0913]]) cf = CoxPHFitter(normalize=False, penalizer=1.0) cf.fit(rossi, duration_col='week', event_col='arrest') npt.assert_array_almost_equal(cf.hazards_.values, expected, decimal=3)
def test_predict_methods_in_regression_return_same_types(self): X = load_regression_dataset() aaf = AalenAdditiveFitter() cph = CoxPHFitter() aaf.fit(X, duration_col='T', event_col='E') cph.fit(X, duration_col='T', event_col='E') for fit_method in ['predict_percentile', 'predict_median', 'predict_expectation', 'predict_survival_function', 'predict_cumulative_hazard']: assert isinstance(getattr(aaf, fit_method)(X), type(getattr(cph, fit_method)(X)))
def test_output_against_R(self, rossi): # from http://cran.r-project.org/doc/contrib/Fox-Companion/appendix-cox-regression.pdf # Link is now broken, but this is the code: # # rossi <- read.csv('.../lifelines/datasets/rossi.csv') # mod.allison <- coxph(Surv(week, arrest) ~ fin + age + race + wexp + mar + paro + prio, # data=rossi) # cat(round(mod.allison$coefficients, 4), sep=", ") expected = np.array([[-0.3794, -0.0574, 0.3139, -0.1498, -0.4337, -0.0849, 0.0915]]) cf = CoxPHFitter(normalize=False) cf.fit(rossi, duration_col='week', event_col='arrest') npt.assert_array_almost_equal(cf.hazards_.values, expected, decimal=3)
def test_coef_output_against_Survival_Analysis_by_John_Klein_and_Melvin_Moeschberger(self): # see example 8.3 in Survival Analysis by John P. Klein and Melvin L. Moeschberger, Second Edition df = load_kidney_transplant(usecols=['time', 'death', 'black_male', 'white_male', 'black_female']) cf = CoxPHFitter(normalize=False) cf.fit(df, duration_col='time', event_col='death') # coefs actual_coefs = cf.hazards_.values expected_coefs = np.array([[0.1596, 0.2484, 0.6567]]) npt.assert_array_almost_equal(actual_coefs, expected_coefs, decimal=4)
def test_hazard_works_as_intended_with_strata_against_R_output(self, rossi): """ > library(survival) > ross = read.csv('rossi.csv') > r = coxph(formula = Surv(week, arrest) ~ fin + age + strata(race, paro, mar, wexp) + prio, data = rossi) > basehaz(r, centered=FALSE) """ cp = CoxPHFitter(normalize=False) cp.fit(rossi, 'week', 'arrest', strata=['race', 'paro', 'mar', 'wexp']) npt.assert_almost_equal(cp.baseline_cumulative_hazard_[(0, 0, 0, 0)].ix[[14, 35, 37, 43, 52]].values, [0.28665890, 0.63524149, 1.01822603, 1.48403930, 1.48403930], decimal=2) npt.assert_almost_equal(cp.baseline_cumulative_hazard_[(0, 0, 0, 1)].ix[[27, 43, 48, 52]].values, [0.35738173, 0.76415714, 1.26635373, 1.26635373], decimal=2)
def test_strata_against_r_output(self, rossi): """ > r = coxph(formula = Surv(week, arrest) ~ fin + age + strata(race, paro, mar, wexp) + prio, data = rossi) > r > r$loglik """ cp = CoxPHFitter(normalize=False) cp.fit(rossi, 'week', 'arrest', strata=['race', 'paro', 'mar', 'wexp'], include_likelihood=True) npt.assert_almost_equal(cp.summary['coef'].values, [-0.335, -0.059, 0.100], decimal=3) assert abs(cp._log_likelihood - -436.9339) / 436.9339 < 0.01
def test_summary(self, rossi): cp = CoxPHFitter() cp.fit(rossi, duration_col='week', event_col='arrest') summDf = cp.summary expectedColumns = ['coef', 'exp(coef)', 'se(coef)', 'z', 'p', 'lower 0.95', 'upper 0.95'] assert all([col in summDf.columns for col in expectedColumns])
def test_warning_is_raised_if_df_has_a_near_constant_column(self, rossi): cox = CoxPHFitter() rossi['constant'] = 1.0 with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") try: cox.fit(rossi, 'week', 'arrest') except: pass assert len(w) == 1 assert issubclass(w[-1].category, RuntimeWarning) assert "variance" in str(w[-1].message)
def test_fit_methods_can_accept_optional_event_col_param(self): X = load_regression_dataset() aaf = AalenAdditiveFitter() aaf.fit(X, 'T', event_col='E') assert_series_equal(aaf.event_observed.sort_index(), X['E'].astype(bool), check_names=False) aaf.fit(X, 'T') npt.assert_array_equal(aaf.event_observed.values, np.ones(X.shape[0])) cph = CoxPHFitter() cph.fit(X, 'T', event_col='E') assert_series_equal(cph.event_observed.sort_index(), X['E'].astype(bool), check_names=False) cph.fit(X, 'T') npt.assert_array_equal(cph.event_observed.values, np.ones(X.shape[0]))
def test_data_normalization(self, data_pred2): # During fit, CoxPH copies the training data and normalizes it. # Future calls should be normalized in the same way and # internal training set should not be saved in a normalized state. cf = CoxPHFitter(normalize=True) cf.fit(data_pred2, duration_col='t', event_col='E') # Internal training set ci_trn = concordance_index(cf.durations, -cf.predict_partial_hazard(cf.data).values, cf.event_observed) # New data should normalize in the exact same way ci_org = concordance_index(data_pred2['t'], -cf.predict_partial_hazard(data_pred2[['x1', 'x2']]).values, data_pred2['E']) assert ci_org == ci_trn
def test_efron_computed_by_hand_examples(self, data_nus): cox = CoxPHFitter() X = data_nus['x'][:, None] T = data_nus['t'] E = data_nus['E'] # Enforce numpy arrays X = np.array(X) T = np.array(T) E = np.array(E) # Want as bools E = E.astype(bool) # tests from http://courses.nus.edu.sg/course/stacar/internet/st3242/handouts/notes3.pdf beta = np.array([[0]]) l, u = cox._get_efron_values(X, beta, T, E) l = -l assert np.abs(l[0][0] - 77.13) < 0.05 assert np.abs(u[0] - -2.51) < 0.05 beta = beta + u / l assert np.abs(beta - -0.0326) < 0.05 l, u = cox._get_efron_values(X, beta, T, E) l = -l assert np.abs(l[0][0] - 72.83) < 0.05 assert np.abs(u[0] - -0.069) < 0.05 beta = beta + u / l assert np.abs(beta - -0.0325) < 0.01 l, u = cox._get_efron_values(X, beta, T, E) l = -l assert np.abs(l[0][0] - 72.70) < 0.01 assert np.abs(u[0] - -0.000061) < 0.01 beta = beta + u / l assert np.abs(beta - -0.0335) < 0.01
def test_using_dataframes_vs_numpy_arrays(self, data_pred2): # First without normalization cf = CoxPHFitter(normalize=False) cf.fit(data_pred2, 't', 'E') X = data_pred2[cf.data.columns] hazards = cf.predict_partial_hazard(X) # A Numpy array should return the same result hazards_n = cf.predict_partial_hazard(np.array(X)) assert np.all(hazards == hazards_n) # Now with normalization cf = CoxPHFitter(normalize=True) cf.fit(data_pred2, 't', 'E') hazards = cf.predict_partial_hazard(X) # Compare with array argument hazards_n = cf.predict_partial_hazard(np.array(X)) assert np.all(hazards == hazards_n)
def test_predict_log_hazard_relative_to_mean_without_normalization(self, rossi): cox = CoxPHFitter(normalize=False) cox.fit(rossi, 'week', 'arrest') log_relative_hazards = cox.predict_log_hazard_relative_to_mean(rossi) means = rossi.mean(0).to_frame().T assert cox.predict_partial_hazard(means).values[0][0] != 1.0 assert_frame_equal(log_relative_hazards, np.log(cox.predict_partial_hazard(rossi) / cox.predict_partial_hazard(means).squeeze()))
def test_concordance_index_fast_is_same_as_slow(): size = 100 T = np.random.normal(size=size) P = np.random.normal(size=size) C = np.random.choice([0, 1], size=size) Z = np.zeros_like(T) # Hard to imagine these failing assert slow_cindex(T, Z, C) == fast_cindex(T, Z, C) assert slow_cindex(T, T, C) == fast_cindex(T, T, C) # This is the real test though assert slow_cindex(T, P, C) == fast_cindex(T, P, C) cp = CoxPHFitter() df = load_rossi() cp.fit(df, duration_col='week', event_col='arrest') T = cp.durations.values.ravel() P = -cp.predict_partial_hazard(cp.data).values.ravel() E = cp.event_observed.values.ravel() assert slow_cindex(T, P, E) == fast_cindex(T, P, E)
def test_prediction_methods_respect_index(self, data_pred2): x = data_pred2[['x1', 'x2']].ix[:3].sort_index(ascending=False) expected_index = pd.Index(np.array([3, 2, 1, 0])) cph = CoxPHFitter() cph.fit(data_pred2, duration_col='t', event_col='E') npt.assert_array_equal(cph.predict_partial_hazard(x).index, expected_index) npt.assert_array_equal(cph.predict_percentile(x).index, expected_index) npt.assert_array_equal(cph.predict_expectation(x).index, expected_index) aaf = AalenAdditiveFitter() aaf.fit(data_pred2, duration_col='t', event_col='E') npt.assert_array_equal(aaf.predict_percentile(x).index, expected_index) npt.assert_array_equal(aaf.predict_expectation(x).index, expected_index)
def test_cross_validator_with_specific_loss_function(): def square_loss(y_actual, y_pred): return ((y_actual - y_pred)**2).mean() cf = CoxPHFitter() results_sq = utils.k_fold_cross_validation( cf, load_regression_dataset(), evaluation_measure=square_loss, duration_col="T", event_col="E", ) results_con = utils.k_fold_cross_validation(cf, load_regression_dataset(), duration_col="T", event_col="E") assert list(results_sq) != list(results_con)
def test_prediction_methods_respect_index(self, data_pred2): x = data_pred2[['x1', 'x2']].ix[:3].sort_index(ascending=False) expected_index = pd.Index(np.array([3, 2, 1, 0])) cph = CoxPHFitter() cph.fit(data_pred2, duration_col='t', event_col='E') npt.assert_array_equal(cph.predict_partial_hazard(x).index, expected_index) npt.assert_array_equal(cph.predict_percentile(x).index, expected_index) npt.assert_array_equal(cph.predict(x).index, expected_index) npt.assert_array_equal(cph.predict_expectation(x).index, expected_index) aaf = AalenAdditiveFitter() aaf.fit(data_pred2, duration_col='t', event_col='E') npt.assert_array_equal(aaf.predict_percentile(x).index, expected_index) npt.assert_array_equal(aaf.predict(x).index, expected_index) npt.assert_array_equal(aaf.predict_expectation(x).index, expected_index)
def test_cross_validator_returns_fitters_k_results(): cf = CoxPHFitter() fitters = [cf, cf] results = utils.k_fold_cross_validation(fitters, load_regression_dataset(), duration_col='T', event_col='E', k=3) assert len(results) == 2 assert len(results[0]) == len(results[1]) == 3 results = utils.k_fold_cross_validation(fitters, load_regression_dataset(), duration_col='T', event_col='E', k=5) assert len(results) == 2 assert len(results[0]) == len(results[1]) == 5
def test_crossval_for_cox_ph(self, data_pred2, data_pred1): cf = CoxPHFitter() for data_pred in [data_pred1, data_pred2]: scores = k_fold_cross_validation( cf, data_pred, duration_col='t', event_col='E', k=3, predictor='predict_partial_hazard') mean_score = 1 - np.mean( scores) # this is because we are using predict_partial_hazard expected = 0.9 msg = "Expected min-mean c-index {:.2f} < {:.2f}" assert mean_score > expected, msg.format(expected, mean_score)
def test_cox_ph_prediction_monotonicity(self, data_pred2): # Concordance wise, all prediction methods should be monotonic versions # of one-another, unless numerical factors screw it up. t = data_pred2['t'] e = data_pred2['E'] X = data_pred2[['x1', 'x2']] cf = CoxPHFitter() cf.fit(data_pred2, duration_col='t', event_col='E') # Base comparison is partial_hazards ci_ph = concordance_index(t, -cf.predict_partial_hazard(X).values, e) ci_med = concordance_index(t, cf.predict_median(X).ravel(), e) assert ci_ph == ci_med ci_exp = concordance_index(t, cf.predict_expectation(X).ravel(), e) assert ci_ph == ci_exp
def test_cox_ph_prediction_monotonicity(self, data_pred2): # Concordance wise, all prediction methods should be monotonic versions # of one-another, unless numerical factors screw it up. t = data_pred2['t'] e = data_pred2['E'] X = data_pred2[['x1', 'x2']] for normalize in [True, False]: msg = ("Predict methods should get the same concordance" + " when {}normalizing".format('' if normalize else 'not ')) cf = CoxPHFitter(normalize=normalize) cf.fit(data_pred2, duration_col='t', event_col='E') # Base comparison is partial_hazards ci_ph = concordance_index(t, -cf.predict_partial_hazard(X).values, e) ci_med = concordance_index(t, cf.predict_median(X).ravel(), e) assert ci_ph == ci_med, msg ci_exp = concordance_index(t, cf.predict_expectation(X).ravel(), e) assert ci_ph == ci_exp, msg
def test_crossval_for_cox_ph_with_normalizing_times(self, data_pred2, data_pred1): cf = CoxPHFitter() for data_pred in [data_pred1, data_pred2]: # why does this data_norm = data_pred.copy() times = data_norm['t'] # Normalize to mean = 0 and standard deviation = 1 times -= np.mean(times) times /= np.std(times) data_norm['t'] = times scores = k_fold_cross_validation(cf, data_norm, duration_col='t', event_col='E', k=3, predictor='predict_partial_hazard') mean_score = 1 - np.mean(scores) expected = 0.9 msg = "Expected min-mean c-index {:.2f} < {:.2f}" assert mean_score > expected, msg.format(expected, mean_score)
def test_data_normalization(self, data_pred2): # During fit, CoxPH copies the training data and normalizes it. # Future calls should be normalized in the same way and # internal training set should not be saved in a normalized state. cf = CoxPHFitter() cf.fit(data_pred2, duration_col='t', event_col='E') # Internal training set ci_trn = concordance_index(cf.durations, -cf.predict_partial_hazard(cf.data).values, cf.event_observed) # New data should normalize in the exact same way ci_org = concordance_index(data_pred2['t'], -cf.predict_partial_hazard(data_pred2[['x1', 'x2']]).values, data_pred2['E']) assert ci_org == ci_trn
def test_print_summary(self): import sys try: from StringIO import StringIO except: from io import StringIO saved_stdout = sys.stdout try: out = StringIO() sys.stdout = out cp = CoxPHFitter() df = load_rossi() cp.fit(df, duration_col='week', event_col='arrest') cp.print_summary() output = out.getvalue().strip().split() expected = """n=432, number of events=114 coef exp(coef) se(coef) z p lower 0.95 upper 0.95 fin -1.897e-01 8.272e-01 9.579e-02 -1.981e+00 4.763e-02 -3.775e-01 -1.938e-03 * age -3.500e-01 7.047e-01 1.344e-01 -2.604e+00 9.210e-03 -6.134e-01 -8.651e-02 ** race 1.032e-01 1.109e+00 1.012e-01 1.020e+00 3.078e-01 -9.516e-02 3.015e-01 wexp -7.486e-02 9.279e-01 1.051e-01 -7.124e-01 4.762e-01 -2.809e-01 1.311e-01 mar -1.421e-01 8.675e-01 1.254e-01 -1.134e+00 2.570e-01 -3.880e-01 1.037e-01 paro -4.134e-02 9.595e-01 9.522e-02 -4.341e-01 6.642e-01 -2.280e-01 1.453e-01 prio 2.639e-01 1.302e+00 8.291e-02 3.182e+00 1.460e-03 1.013e-01 4.264e-01 ** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Concordance = 0.640""".strip().split() for i in [0, 1, 2, -2, -1]: assert output[i] == expected[i] finally: sys.stdout = saved_stdout
def test_cross_validator_with_stratified_cox_model(): cf = CoxPHFitter(strata=["race"]) utils.k_fold_cross_validation(cf, load_rossi(), duration_col="week", event_col="arrest")
def test_predict_log_hazard_relative_to_mean(self, rossi): cox = CoxPHFitter() cox.fit(rossi, 'week', 'arrest') log_relative_hazards = cox.predict_log_hazard_relative_to_mean(rossi) means = rossi.mean(0).to_frame().T assert_frame_equal(log_relative_hazards, np.log(cox.predict_partial_hazard(rossi) / cox.predict_partial_hazard(means).squeeze()))
def test_log_likelihood_is_available_in_output(self, data_nus): cox = CoxPHFitter() cox.fit(data_nus, duration_col='t', event_col='E', include_likelihood=True) assert abs(cox._log_likelihood - -12.7601409152) < 0.001
def test_input_column_order_is_equal_to_output_hazards_order(self, rossi): cp = CoxPHFitter() expected = ['fin', 'age', 'race', 'wexp', 'mar', 'paro', 'prio'] cp.fit(rossi, event_col='week', duration_col='arrest') assert list(cp.hazards_.columns) == expected
def test_strata_removes_variable_from_summary_output(self, rossi): cp = CoxPHFitter() cp.fit(rossi, 'week', 'arrest', strata=['race']) assert 'race' not in cp.summary.index
def test_cross_validator_with_predictor(): cf = CoxPHFitter() results = utils.k_fold_cross_validation(cf, load_regression_dataset(), duration_col='T', event_col='E', k=3, predictor="predict_expectation") assert len(results) == 3
def test_fit_method(self, data_nus): cf = CoxPHFitter(normalize=False) cf.fit(data_nus, duration_col='t', event_col='E') assert np.abs(cf.hazards_.ix[0][0] - -0.0335) < 0.0001
def test_fit_method(self, data_nus): cf = CoxPHFitter() cf.fit(data_nus, duration_col='t', event_col='E') assert np.abs(cf.hazards_.ix[0][0] - -0.0335) < 0.0001
def test_strata_removes_variable_from_summary_output(self): df = load_rossi() cp = CoxPHFitter() cp.fit(df, 'week', 'arrest', strata=['race']) assert 'race' not in cp.summary.index
def test_efron_newtons_method(self, data_nus): newton = CoxPHFitter()._newton_rhaphson X, T, E = data_nus[['x']], data_nus['t'], data_nus['E'] assert np.abs(newton(X, T, E)[0][0] - -0.0335) < 0.0001
def regression_models(self): return [CoxPHFitter(), AalenAdditiveFitter(), CoxPHFitter(strata=['race', 'paro', 'mar', 'wexp'])]
kaplen_meier.fit(time_of_event, timeline=time, event_observed=event, label='All patients') kaplen_meier.plot() plt.show() #stratify Congestive Heart Complications history = df['chf'] == 1; kaplen_meier = KaplanMeierFitter() kaplen_meier.fit(time_of_event[history], timeline=time, event_observed=event[history], label='Congestive heart complications') ax = kaplen_meier.plot() kaplen_meier.fit(time_of_event[~history], timeline=time, event_observed=event[~history], label='No congestive heart complications') kaplen_meier.plot(ax=ax, c="b") plt.show() #Cox proportional hazard ph_data = df[["fstat", "lenfol", "bmi", "age"]] ph = CoxPHFitter() ph.fit(ph_data, 'lenfol', event_col='fstat') ph.print_summary() print(ph.baseline_hazard_.head()) #use predict_survival_function to get probability x = ph_data[ph_data.columns.difference(['lenfol', 'fstat'])].ix[23:25] print(x) ph.predict_survival_function(x).plot() plt.show()
def test_cross_validator_with_predictor_and_kwargs(): cf = CoxPHFitter() results_06 = utils.k_fold_cross_validation(cf, load_regression_dataset(), duration_col='T', k=3, predictor="predict_percentile", predictor_kwargs={'p': 0.6}) assert len(results_06) == 3
def test_cross_validator_with_stratified_cox_model(): cf = CoxPHFitter(strata=['race']) utils.k_fold_cross_validation(cf, load_rossi(), duration_col='week', event_col='arrest')
def test_input_column_order_is_equal_to_output_hazards_order(self): rossi = load_rossi() cp = CoxPHFitter() expected = ['fin', 'age', 'race', 'wexp', 'mar', 'paro', 'prio'] cp.fit(rossi, event_col='week', duration_col='arrest') assert list(cp.hazards_.columns) == expected
#cox regression if __name__ == "__main__": import pandas as pd import time from lifelines.estimation import CoxPHFitter from lifelines.datasets import load_rossi df = load_rossi() df = pd.concat([df] * 20) cp = CoxPHFitter() start_time = time.time() cp.fit(df, duration_col='week', event_col="arrest") print("--- %s seconds ---" % (time.time() - start_time))