def test_predict_log_hazard_relative_to_mean_without_normalization(self, rossi): cox = CoxPHFitter(normalize=False) cox.fit(rossi, 'week', 'arrest') log_relative_hazards = cox.predict_log_hazard_relative_to_mean(rossi) means = rossi.mean(0).to_frame().T assert cox.predict_partial_hazard(means).values[0][0] != 1.0 assert_frame_equal(log_relative_hazards, np.log(cox.predict_partial_hazard(rossi) / cox.predict_partial_hazard(means).squeeze()))
def test_using_dataframes_vs_numpy_arrays(self, data_pred2): cf = CoxPHFitter() cf.fit(data_pred2, 't', 'E') X = data_pred2[cf.data.columns] assert_frame_equal( cf.predict_partial_hazard(np.array(X)), cf.predict_partial_hazard(X) )
def test_predict_log_hazard_relative_to_mean_with_normalization(self, rossi): cox = CoxPHFitter(normalize=True) cox.fit(rossi, 'week', 'arrest') # they are equal because the data is normalized, so the mean of the covarites is all 0, # thus exp(beta * 0) == 1, so exp(beta * X)/exp(beta * 0) = exp(beta * X) assert_frame_equal(cox.predict_log_hazard_relative_to_mean(rossi), np.log(cox.predict_partial_hazard(rossi)))
def test_data_normalization(self, data_pred2): # During fit, CoxPH copies the training data and normalizes it. # Future calls should be normalized in the same way and # internal training set should not be saved in a normalized state. cf = CoxPHFitter() cf.fit(data_pred2, duration_col='t', event_col='E') # Internal training set ci_trn = concordance_index(cf.durations, -cf.predict_partial_hazard(cf.data).values, cf.event_observed) # New data should normalize in the exact same way ci_org = concordance_index(data_pred2['t'], -cf.predict_partial_hazard(data_pred2[['x1', 'x2']]).values, data_pred2['E']) assert ci_org == ci_trn
def test_data_normalization(self, data_pred2): # During fit, CoxPH copies the training data and normalizes it. # Future calls should be normalized in the same way and # internal training set should not be saved in a normalized state. cf = CoxPHFitter(normalize=True) cf.fit(data_pred2, duration_col='t', event_col='E') # Internal training set ci_trn = concordance_index(cf.durations, -cf.predict_partial_hazard(cf.data).values, cf.event_observed) # New data should normalize in the exact same way ci_org = concordance_index(data_pred2['t'], -cf.predict_partial_hazard(data_pred2[['x1', 'x2']]).values, data_pred2['E']) assert ci_org == ci_trn
def test_using_dataframes_vs_numpy_arrays(self, data_pred2): # First without normalization cf = CoxPHFitter(normalize=False) cf.fit(data_pred2, 't', 'E') X = data_pred2[cf.data.columns] hazards = cf.predict_partial_hazard(X) # A Numpy array should return the same result hazards_n = cf.predict_partial_hazard(np.array(X)) assert np.all(hazards == hazards_n) # Now with normalization cf = CoxPHFitter(normalize=True) cf.fit(data_pred2, 't', 'E') hazards = cf.predict_partial_hazard(X) # Compare with array argument hazards_n = cf.predict_partial_hazard(np.array(X)) assert np.all(hazards == hazards_n)
def test_using_dataframes_vs_numpy_arrays(self, data_pred2): # First without normalization cf = CoxPHFitter(normalize=False) cf.fit(data_pred2, 't', 'E') X = data_pred2[cf.data.columns] hazards = cf.predict_partial_hazard(X) # A Numpy array should return the same result hazards_n = cf.predict_partial_hazard(np.array(X)) assert np.all(hazards == hazards_n) # Now with normalization cf = CoxPHFitter(normalize=True) cf.fit(data_pred2, 't', 'E') hazards = cf.predict_partial_hazard(X) # Compare with array argument hazards_n = cf.predict_partial_hazard(np.array(X)) assert np.all(hazards == hazards_n)
def test_prediction_methods_respect_index(self, data_pred2): x = data_pred2[['x1', 'x2']].ix[:3].sort_index(ascending=False) expected_index = pd.Index(np.array([3, 2, 1, 0])) cph = CoxPHFitter() cph.fit(data_pred2, duration_col='t', event_col='E') npt.assert_array_equal(cph.predict_partial_hazard(x).index, expected_index) npt.assert_array_equal(cph.predict_percentile(x).index, expected_index) npt.assert_array_equal(cph.predict_expectation(x).index, expected_index) aaf = AalenAdditiveFitter() aaf.fit(data_pred2, duration_col='t', event_col='E') npt.assert_array_equal(aaf.predict_percentile(x).index, expected_index) npt.assert_array_equal(aaf.predict_expectation(x).index, expected_index)
def test_cox_ph_prediction_monotonicity(self, data_pred2): # Concordance wise, all prediction methods should be monotonic versions # of one-another, unless numerical factors screw it up. t = data_pred2['t'] e = data_pred2['E'] X = data_pred2[['x1', 'x2']] cf = CoxPHFitter() cf.fit(data_pred2, duration_col='t', event_col='E') # Base comparison is partial_hazards ci_ph = concordance_index(t, -cf.predict_partial_hazard(X).values, e) ci_med = concordance_index(t, cf.predict_median(X).ravel(), e) assert ci_ph == ci_med ci_exp = concordance_index(t, cf.predict_expectation(X).ravel(), e) assert ci_ph == ci_exp
def test_cox_ph_prediction_monotonicity(self, data_pred2): # Concordance wise, all prediction methods should be monotonic versions # of one-another, unless numerical factors screw it up. t = data_pred2['t'] e = data_pred2['E'] X = data_pred2[['x1', 'x2']] for normalize in [True, False]: msg = ("Predict methods should get the same concordance" + " when {}normalizing".format('' if normalize else 'not ')) cf = CoxPHFitter(normalize=normalize) cf.fit(data_pred2, duration_col='t', event_col='E') # Base comparison is partial_hazards ci_ph = concordance_index(t, -cf.predict_partial_hazard(X).values, e) ci_med = concordance_index(t, cf.predict_median(X).ravel(), e) assert ci_ph == ci_med, msg ci_exp = concordance_index(t, cf.predict_expectation(X).ravel(), e) assert ci_ph == ci_exp, msg
def test_cox_ph_prediction_monotonicity(self, data_pred2): # Concordance wise, all prediction methods should be monotonic versions # of one-another, unless numerical factors screw it up. t = data_pred2['t'] e = data_pred2['E'] X = data_pred2[['x1', 'x2']] for normalize in [True, False]: msg = ("Predict methods should get the same concordance" + " when {}normalizing".format('' if normalize else 'not ')) cf = CoxPHFitter(normalize=normalize) cf.fit(data_pred2, duration_col='t', event_col='E') # Base comparison is partial_hazards ci_ph = concordance_index(t, -cf.predict_partial_hazard(X).values, e) ci_med = concordance_index(t, cf.predict_median(X).ravel(), e) assert ci_ph == ci_med, msg ci_exp = concordance_index(t, cf.predict_expectation(X).ravel(), e) assert ci_ph == ci_exp, msg
def test_concordance_index_fast_is_same_as_slow(): size = 100 T = np.random.normal(size=size) P = np.random.normal(size=size) C = np.random.choice([0, 1], size=size) Z = np.zeros_like(T) # Hard to imagine these failing assert slow_cindex(T, Z, C) == fast_cindex(T, Z, C) assert slow_cindex(T, T, C) == fast_cindex(T, T, C) # This is the real test though assert slow_cindex(T, P, C) == fast_cindex(T, P, C) cp = CoxPHFitter() df = load_rossi() cp.fit(df, duration_col='week', event_col='arrest') T = cp.durations.values.ravel() P = -cp.predict_partial_hazard(cp.data).values.ravel() E = cp.event_observed.values.ravel() assert slow_cindex(T, P, E) == fast_cindex(T, P, E)
def test_concordance_index_fast_is_same_as_slow(): size = 100 T = np.random.normal(size=size) P = np.random.normal(size=size) C = np.random.choice([0, 1], size=size) Z = np.zeros_like(T) # Hard to imagine these failing assert slow_cindex(T, Z, C) == fast_cindex(T, Z, C) assert slow_cindex(T, T, C) == fast_cindex(T, T, C) # This is the real test though assert slow_cindex(T, P, C) == fast_cindex(T, P, C) cp = CoxPHFitter() df = load_rossi() cp.fit(df, duration_col='week', event_col='arrest') T = cp.durations.values.ravel() P = -cp.predict_partial_hazard(cp.data).values.ravel() E = cp.event_observed.values.ravel() assert slow_cindex(T, P, E) == fast_cindex(T, P, E)
def test_predict_log_hazard_relative_to_mean(self, rossi): cox = CoxPHFitter() cox.fit(rossi, 'week', 'arrest') log_relative_hazards = cox.predict_log_hazard_relative_to_mean(rossi) means = rossi.mean(0).to_frame().T assert_frame_equal(log_relative_hazards, np.log(cox.predict_partial_hazard(rossi) / cox.predict_partial_hazard(means).squeeze()))