Ejemplo n.º 1
0
 def test_predict_log_hazard_relative_to_mean_without_normalization(self, rossi):
     cox = CoxPHFitter(normalize=False)
     cox.fit(rossi, 'week', 'arrest')
     log_relative_hazards = cox.predict_log_hazard_relative_to_mean(rossi)
     means = rossi.mean(0).to_frame().T
     assert cox.predict_partial_hazard(means).values[0][0] != 1.0  
     assert_frame_equal(log_relative_hazards, np.log(cox.predict_partial_hazard(rossi) / cox.predict_partial_hazard(means).squeeze()))
Ejemplo n.º 2
0
    def test_using_dataframes_vs_numpy_arrays(self, data_pred2):
        cf = CoxPHFitter()
        cf.fit(data_pred2, 't', 'E')

        X = data_pred2[cf.data.columns]
        assert_frame_equal(
            cf.predict_partial_hazard(np.array(X)),
            cf.predict_partial_hazard(X)
        )
Ejemplo n.º 3
0
    def test_predict_log_hazard_relative_to_mean_with_normalization(self, rossi):
        cox = CoxPHFitter(normalize=True)
        cox.fit(rossi, 'week', 'arrest')

        # they are equal because the data is normalized, so the mean of the covarites is all 0,
        # thus exp(beta * 0) == 1, so exp(beta * X)/exp(beta * 0) = exp(beta * X)
        assert_frame_equal(cox.predict_log_hazard_relative_to_mean(rossi), np.log(cox.predict_partial_hazard(rossi)))
Ejemplo n.º 4
0
    def test_data_normalization(self, data_pred2):
        # During fit, CoxPH copies the training data and normalizes it.
        # Future calls should be normalized in the same way and
        # internal training set should not be saved in a normalized state.

        cf = CoxPHFitter()
        cf.fit(data_pred2, duration_col='t', event_col='E')

        # Internal training set
        ci_trn = concordance_index(cf.durations,
                                   -cf.predict_partial_hazard(cf.data).values,
                                   cf.event_observed)
        # New data should normalize in the exact same way
        ci_org = concordance_index(data_pred2['t'],
                                   -cf.predict_partial_hazard(data_pred2[['x1', 'x2']]).values,
                                   data_pred2['E'])

        assert ci_org == ci_trn
Ejemplo n.º 5
0
    def test_data_normalization(self, data_pred2):
        # During fit, CoxPH copies the training data and normalizes it.
        # Future calls should be normalized in the same way and
        # internal training set should not be saved in a normalized state.

        cf = CoxPHFitter(normalize=True)
        cf.fit(data_pred2, duration_col='t', event_col='E')

        # Internal training set
        ci_trn = concordance_index(cf.durations,
                                   -cf.predict_partial_hazard(cf.data).values,
                                   cf.event_observed)
        # New data should normalize in the exact same way
        ci_org = concordance_index(data_pred2['t'],
                                   -cf.predict_partial_hazard(data_pred2[['x1', 'x2']]).values,
                                   data_pred2['E'])

        assert ci_org == ci_trn
Ejemplo n.º 6
0
    def test_using_dataframes_vs_numpy_arrays(self, data_pred2):
        # First without normalization
        cf = CoxPHFitter(normalize=False)
        cf.fit(data_pred2, 't', 'E')

        X = data_pred2[cf.data.columns]
        hazards = cf.predict_partial_hazard(X)

        # A Numpy array should return the same result
        hazards_n = cf.predict_partial_hazard(np.array(X))
        assert np.all(hazards == hazards_n)

        # Now with normalization
        cf = CoxPHFitter(normalize=True)
        cf.fit(data_pred2, 't', 'E')

        hazards = cf.predict_partial_hazard(X)

        # Compare with array argument
        hazards_n = cf.predict_partial_hazard(np.array(X))
        assert np.all(hazards == hazards_n)
Ejemplo n.º 7
0
    def test_using_dataframes_vs_numpy_arrays(self, data_pred2):
        # First without normalization
        cf = CoxPHFitter(normalize=False)
        cf.fit(data_pred2, 't', 'E')

        X = data_pred2[cf.data.columns]
        hazards = cf.predict_partial_hazard(X)

        # A Numpy array should return the same result
        hazards_n = cf.predict_partial_hazard(np.array(X))
        assert np.all(hazards == hazards_n)

        # Now with normalization
        cf = CoxPHFitter(normalize=True)
        cf.fit(data_pred2, 't', 'E')

        hazards = cf.predict_partial_hazard(X)

        # Compare with array argument
        hazards_n = cf.predict_partial_hazard(np.array(X))
        assert np.all(hazards == hazards_n)
Ejemplo n.º 8
0
    def test_prediction_methods_respect_index(self, data_pred2):
        x = data_pred2[['x1', 'x2']].ix[:3].sort_index(ascending=False)
        expected_index = pd.Index(np.array([3, 2, 1, 0]))

        cph = CoxPHFitter()
        cph.fit(data_pred2, duration_col='t', event_col='E')
        npt.assert_array_equal(cph.predict_partial_hazard(x).index, expected_index)
        npt.assert_array_equal(cph.predict_percentile(x).index, expected_index)
        npt.assert_array_equal(cph.predict_expectation(x).index, expected_index)

        aaf = AalenAdditiveFitter()
        aaf.fit(data_pred2, duration_col='t', event_col='E')
        npt.assert_array_equal(aaf.predict_percentile(x).index, expected_index)
        npt.assert_array_equal(aaf.predict_expectation(x).index, expected_index)
Ejemplo n.º 9
0
    def test_cox_ph_prediction_monotonicity(self, data_pred2):
        # Concordance wise, all prediction methods should be monotonic versions
        # of one-another, unless numerical factors screw it up.
        t = data_pred2['t']
        e = data_pred2['E']
        X = data_pred2[['x1', 'x2']]

        cf = CoxPHFitter()
        cf.fit(data_pred2, duration_col='t', event_col='E')

        # Base comparison is partial_hazards
        ci_ph = concordance_index(t, -cf.predict_partial_hazard(X).values, e)

        ci_med = concordance_index(t, cf.predict_median(X).ravel(), e)
        assert ci_ph == ci_med

        ci_exp = concordance_index(t, cf.predict_expectation(X).ravel(), e)
        assert ci_ph == ci_exp
Ejemplo n.º 10
0
    def test_cox_ph_prediction_monotonicity(self, data_pred2):
        # Concordance wise, all prediction methods should be monotonic versions
        # of one-another, unless numerical factors screw it up.
        t = data_pred2['t']
        e = data_pred2['E']
        X = data_pred2[['x1', 'x2']]

        for normalize in [True, False]:
            msg = ("Predict methods should get the same concordance" +
                   " when {}normalizing".format('' if normalize else 'not '))
            cf = CoxPHFitter(normalize=normalize)
            cf.fit(data_pred2, duration_col='t', event_col='E')

            # Base comparison is partial_hazards
            ci_ph = concordance_index(t, -cf.predict_partial_hazard(X).values, e)

            ci_med = concordance_index(t, cf.predict_median(X).ravel(), e)
            assert ci_ph == ci_med, msg

            ci_exp = concordance_index(t, cf.predict_expectation(X).ravel(), e)
            assert ci_ph == ci_exp, msg
Ejemplo n.º 11
0
    def test_cox_ph_prediction_monotonicity(self, data_pred2):
        # Concordance wise, all prediction methods should be monotonic versions
        # of one-another, unless numerical factors screw it up.
        t = data_pred2['t']
        e = data_pred2['E']
        X = data_pred2[['x1', 'x2']]

        for normalize in [True, False]:
            msg = ("Predict methods should get the same concordance" +
                   " when {}normalizing".format('' if normalize else 'not '))
            cf = CoxPHFitter(normalize=normalize)
            cf.fit(data_pred2, duration_col='t', event_col='E')

            # Base comparison is partial_hazards
            ci_ph = concordance_index(t, -cf.predict_partial_hazard(X).values, e)

            ci_med = concordance_index(t, cf.predict_median(X).ravel(), e)
            assert ci_ph == ci_med, msg

            ci_exp = concordance_index(t, cf.predict_expectation(X).ravel(), e)
            assert ci_ph == ci_exp, msg
Ejemplo n.º 12
0
def test_concordance_index_fast_is_same_as_slow():
    size = 100
    T = np.random.normal(size=size)
    P = np.random.normal(size=size)
    C = np.random.choice([0, 1], size=size)
    Z = np.zeros_like(T)

    # Hard to imagine these failing
    assert slow_cindex(T, Z, C) == fast_cindex(T, Z, C)
    assert slow_cindex(T, T, C) == fast_cindex(T, T, C)
    # This is the real test though
    assert slow_cindex(T, P, C) == fast_cindex(T, P, C)

    cp = CoxPHFitter()
    df = load_rossi()
    cp.fit(df, duration_col='week', event_col='arrest')

    T = cp.durations.values.ravel()
    P = -cp.predict_partial_hazard(cp.data).values.ravel()
    E = cp.event_observed.values.ravel()

    assert slow_cindex(T, P, E) == fast_cindex(T, P, E)
Ejemplo n.º 13
0
def test_concordance_index_fast_is_same_as_slow():
    size = 100
    T = np.random.normal(size=size)
    P = np.random.normal(size=size)
    C = np.random.choice([0, 1], size=size)
    Z = np.zeros_like(T)

    # Hard to imagine these failing
    assert slow_cindex(T, Z, C) == fast_cindex(T, Z, C)
    assert slow_cindex(T, T, C) == fast_cindex(T, T, C)
    # This is the real test though
    assert slow_cindex(T, P, C) == fast_cindex(T, P, C)

    cp = CoxPHFitter()
    df = load_rossi()
    cp.fit(df, duration_col='week', event_col='arrest')

    T = cp.durations.values.ravel()
    P = -cp.predict_partial_hazard(cp.data).values.ravel()
    E = cp.event_observed.values.ravel()

    assert slow_cindex(T, P, E) == fast_cindex(T, P, E)
Ejemplo n.º 14
0
 def test_predict_log_hazard_relative_to_mean(self, rossi):
     cox = CoxPHFitter()
     cox.fit(rossi, 'week', 'arrest')
     log_relative_hazards = cox.predict_log_hazard_relative_to_mean(rossi)
     means = rossi.mean(0).to_frame().T
     assert_frame_equal(log_relative_hazards, np.log(cox.predict_partial_hazard(rossi) / cox.predict_partial_hazard(means).squeeze()))