def test_incidence(): # Check estimates in R: # ftime = c(1, 1, 2, 4, 4, 4, 6, 6, 7, 8, 9, 9, 9, 1, 2, 2, 4, 4) # fstat = c(1, 1, 1, 2, 2, 2, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0) # cuminc(ftime, fstat) # # The standard errors agree with Stata, not with R (cmprisk # package), which uses a different SE formula from Aalen (1978) # # To check with Stata: # stset ftime failure(fstat==1) # stcompet ci=ci, compet1(2) ftime = np.r_[1, 1, 2, 4, 4, 4, 6, 6, 7, 8, 9, 9, 9, 1, 2, 2, 4, 4] fstat = np.r_[1, 1, 1, 2, 2, 2, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0] ci = CumIncidenceRight(ftime, fstat) cinc = [ np.array([ 0.11111111, 0.17037037, 0.17037037, 0.17037037, 0.17037037, 0.17037037, 0.17037037 ]), np.array([ 0., 0., 0.20740741, 0.20740741, 0.20740741, 0.20740741, 0.20740741 ]), np.array([0., 0., 0., 0.17777778, 0.26666667, 0.26666667, 0.26666667]) ] assert_allclose(cinc[0], ci.cinc[0]) assert_allclose(cinc[1], ci.cinc[1]) assert_allclose(cinc[2], ci.cinc[2]) cinc_se = [ np.array([ 0.07407407, 0.08976251, 0.08976251, 0.08976251, 0.08976251, 0.08976251, 0.08976251 ]), np.array([ 0., 0., 0.10610391, 0.10610391, 0.10610391, 0.10610391, 0.10610391 ]), np.array([0., 0., 0., 0.11196147, 0.12787781, 0.12787781, 0.12787781]) ] assert_allclose(cinc_se[0], ci.cinc_se[0]) assert_allclose(cinc_se[1], ci.cinc_se[1]) assert_allclose(cinc_se[2], ci.cinc_se[2]) # Simple check for frequency weights weights = np.ones(len(ftime)) ciw = CumIncidenceRight(ftime, fstat, freq_weights=weights) assert_allclose(ci.cinc[0], ciw.cinc[0]) assert_allclose(ci.cinc[1], ciw.cinc[1]) assert_allclose(ci.cinc[2], ciw.cinc[2])
def test_kernel_cumincidence2(): # cases with tied times n = 100 np.random.seed(3434) x = np.random.normal(size=(n, 3)) time = np.random.randint(0, 10, size=n) status = np.random.randint(0, 3, size=n) CumIncidenceRight(time, status, exog=x, bw_factor=10000)
def test_kernel_cumincidence1(): # Check that when the bandwidth is very large, the kernel # procedure agrees with standard cumulative incidence # calculations. (Note: the results do not agree perfectly when # there are tied times). n = 100 np.random.seed(3434) x = np.random.normal(size=(n, 3)) time = np.random.uniform(0, 10, size=n) status = np.random.randint(0, 3, size=n) result1 = CumIncidenceRight(time, status) for dimred in False, True: result2 = CumIncidenceRight(time, status, exog=x, bw_factor=10000, dimred=dimred) assert_allclose(result1.times, result2.times) for k in 0, 1: assert_allclose(result1.cinc[k], result2.cinc[k], rtol=1e-5)
def test_incidence2(): # Check that the cumulative incidence functions for all competing # risks sum to the complementary survival function. np.random.seed(2423) n = 200 time = -np.log(np.random.uniform(size=n)) status = np.random.randint(0, 3, size=n) ii = np.argsort(time) time = time[ii] status = status[ii] ci = CumIncidenceRight(time, status) statusa = 1 * (status >= 1) sf = SurvfuncRight(time, statusa) x = 1 - sf.surv_prob y = (ci.cinc[0] + ci.cinc[1])[np.flatnonzero(statusa)] assert_allclose(x, y)