Esempio n. 1
0
def test_leukemia():
    """Check the Nelson-Aalen estimator on the leukemia dataset."""
    leukemia = datasets.leukemia()
    nelson_aalen = NelsonAalen(var_type="aalen", tie_break="discrete")
    nelson_aalen.fit("time", status="status", group="group", data=leukemia)

    # Table 4.2 on p. 94 in  in Klein & Moeschberger (2003) displays the
    # Nelson-Aalen cumulative hazard estimates and standard errors for the
    # treatment group to 4 decimal places
    times = np.array([0, 6, 7, 10, 13, 16, 22, 23])
    cum_haz_treatment = \
        [0., 0.1428, 0.2017, 0.2683, 0.3517, 0.4426, 0.5854, 0.7521]
    std_err_treatment = [
        0., 0.0825, 0.1015, 0.1212, 0.1473, 0.1729, 0.2243, 0.2795
    ]

    for eps in (0., 0.5):
        # Perturb the times forward by a small amount `eps` to ensure that
        # the estimates are right continuous piecewise constant
        cum_haz_pred, std_err_pred = \
            nelson_aalen.predict(times + eps, return_se=True)

        np.testing.assert_almost_equal(cum_haz_pred.treatment,
                                       cum_haz_treatment,
                                       decimal=3)

        np.testing.assert_almost_equal(std_err_pred.treatment,
                                       std_err_treatment,
                                       decimal=3)
Esempio n. 2
0
def test_init_with_dataframe():
    """Test initializing with a DataFrame."""
    # Leukemia dataset
    leukemia = datasets.leukemia()

    # Initialize with column names
    SurvivalData("time", status="status", group="group", data=leukemia)

    # Initialize with arrays
    SurvivalData(leukemia.time,
                 status=leukemia.status,
                 group=leukemia.group,
                 data=leukemia)

    # Channing House dataset
    channing = datasets.channing()

    # Initialize with column names
    SurvivalData("exit",
                 entry="entry",
                 status="status",
                 group="sex",
                 data=channing,
                 warn=False)

    # Initialize with arrays
    SurvivalData(channing.exit,
                 entry=channing.entry,
                 status=channing.status,
                 group=channing.sex,
                 data=channing,
                 warn=False)
Esempio n. 3
0
def test_formatting():
    """Test the string formatting functions."""
    leukemia = datasets.leukemia()
    surv = SurvivalData("time", status="status", group="group", data=leukemia)
    # Good options
    surv.set_format(censor_marker="!")

    # Bad option
    with pytest.raises(RuntimeError):
        surv.set_format(invalid_option="??")
Esempio n. 4
0
def test_fit_predict_summary():
    """Check all the fit parameters and predictions."""
    leukemia = datasets.leukemia()
    surv = SurvivalData("time", status="status", group="group", data=leukemia)
    for conf_type, var_type, tie_break in product(*NA_PARAMETERS):
        breslow = Breslow(conf_type=conf_type, var_type=var_type,
                                   tie_break=tie_break)
        breslow.fit(surv)

        # TODO: figure out better tests here
        breslow.predict([0, 1, 2])
        breslow.predict([0, 1, 2], return_ci=True)
        breslow.summary.table("treatment")
Esempio n. 5
0
def test_init_with_arrays():
    """Test initializing with arrays of data."""
    # Leukemia dataset
    leukemia = datasets.leukemia()
    SurvivalData(leukemia.time, status=leukemia.status, group=leukemia.group)

    # Channing House dataset
    channing = datasets.channing()
    SurvivalData(channing.exit,
                 entry=channing.entry,
                 status=channing.status,
                 group=channing.sex,
                 warn=False)
Esempio n. 6
0
def test_leukemia():
    """Check computed values on the leukemia dataset."""
    leukemia = datasets.leukemia()
    kaplan_meier = KaplanMeier(var_type="greenwood")
    kaplan_meier.fit("time", status="status", group="group", data=leukemia)

    # Table Table 4.1 on p. 49 in Cox & Oakes (1984) displays the Kaplan-Meier
    # estimates for the treatment group to 4 decimal places, and Table 4.1B on
    # p. 93 in Klein & Moeschberger (2003) lists their Greenwood's formula-based
    # standard errors to 3 decimal places
    times = np.array([0, 6, 7, 10, 13, 16, 22, 23])
    survival_treatment = \
        [1., 0.8571, 0.8067, 0.7529, 0.6902, 0.6275, 0.5378, 0.4482]
    std_err_treatment = [0., 0.076, 0.087, 0.096, 0.107, 0.114, 0.128, 0.135]

    for eps in (0., 0.5):
        # Perturb the times forward by a small amount `eps` to ensure that
        # the estimates are right continuous piecewise constant
        survival_pred, std_err_pred = \
            kaplan_meier.predict(times + eps, return_se=True)

        np.testing.assert_almost_equal(survival_pred.treatment,
                                       survival_treatment,
                                       decimal=3)

        np.testing.assert_almost_equal(std_err_pred.treatment,
                                       std_err_treatment,
                                       decimal=3)

    # The Example in the left margin of p. 53 in Kleinbaum & Klein (2005) lists
    # the Kaplan-Meier estimates for the control group. Since there is no
    # censoring in the control group, this is the same as the empirical survival
    # function
    times = np.asarray([0, 1, 2, 3, 4, 5, 8, 11, 12, 15, 17, 22, 23])
    survival_control = [
        1., 19 / 21, 17 / 21, 16 / 21, 14 / 21, 12 / 21, 8 / 21, 6 / 21,
        4 / 21, 3 / 21, 2 / 21, 1 / 21, 0.
    ]

    for eps in (0., 0.5):
        # Perturb the times forward by a small amount `eps` to ensure that
        # the estimates are right continuous piecewise constant
        survival_pred, std_err_pred = \
            kaplan_meier.predict(times + eps, return_se=True)

        np.testing.assert_almost_equal(survival_pred.control,
                                       survival_control,
                                       decimal=3)

    # Page 27 in http://www.math.ucsd.edu/~rxu/math284/slect2.pdf lists
    # Kaplan-Meier summary tables from R's survfit function (in the survival
    # package) with three different types of confidence intervals for the
    # treatment group
    times = np.array([6, 7, 10, 13, 16, 22, 23])

    # Confidence intervals of type "log"
    ci_lower_log = [
        0.7198171, 0.6531242, 0.5859190, 0.5096131, 0.4393939, 0.3370366,
        0.2487882
    ]
    ci_upper_log = [
        1.0000000, 0.9964437, 0.9675748, 0.9347692, 0.8959949, 0.8582008,
        0.8073720
    ]

    kaplan_meier = KaplanMeier(conf_type="log", var_type="greenwood")
    kaplan_meier.fit("time", status="status", group="group", data=leukemia)

    for eps in (0., 0.5):
        # Perturb the times forward by a small amount `eps` to ensure that the
        # estimates are right continuous piecewise constant
        _, ci_lower_pred, ci_upper_pred = \
            kaplan_meier.predict(times + eps, return_ci=True)

        np.testing.assert_almost_equal(ci_lower_pred.treatment,
                                       ci_lower_log,
                                       decimal=7)
        np.testing.assert_almost_equal(ci_upper_pred.treatment,
                                       ci_upper_log,
                                       decimal=7)

    # Confidence intervals of type "log-log"
    ci_lower_log_log = [
        0.6197180, 0.5631466, 0.5031995, 0.4316102, 0.3675109, 0.2677789,
        0.1880520
    ]
    ci_upper_log_log = [
        0.9515517, 0.9228090, 0.8893618, 0.8490660, 0.8049122, 0.7467907,
        0.6801426
    ]

    kaplan_meier = KaplanMeier(conf_type="log-log", var_type="greenwood")
    kaplan_meier.fit("time", status="status", group="group", data=leukemia)

    for eps in (0., 0.5):
        # Perturb the times forward by a small amount `eps` to ensure that the
        # estimates are right continuous piecewise constant
        _, ci_lower_pred, ci_upper_pred = \
            kaplan_meier.predict(times + eps, return_ci=True)

        np.testing.assert_almost_equal(ci_lower_pred.treatment,
                                       ci_lower_log_log,
                                       decimal=7)
        np.testing.assert_almost_equal(ci_upper_pred.treatment,
                                       ci_upper_log_log,
                                       decimal=7)

    # Confidence intervals of type "linear" (in R: "plain")
    ci_lower_linear = [
        0.7074793, 0.6363327, 0.5640993, 0.4808431, 0.4039095, 0.2864816,
        0.1843849
    ]
    ci_upper_linear = [
        1.0000000, 0.9771127, 0.9417830, 0.8995491, 0.8509924, 0.7891487,
        0.7119737
    ]

    kaplan_meier = KaplanMeier(conf_type="linear", var_type="greenwood")
    kaplan_meier.fit("time", status="status", group="group", data=leukemia)

    for eps in (0., 0.5):
        # Perturb the times forward by a small amount `eps` to ensure that the
        # estimates are right continuous piecewise constant
        _, ci_lower_pred, ci_upper_pred = \
            kaplan_meier.predict(times + eps, return_ci=True)

        np.testing.assert_almost_equal(ci_lower_pred.treatment,
                                       ci_lower_linear,
                                       decimal=7)
        np.testing.assert_almost_equal(ci_upper_pred.treatment,
                                       ci_upper_linear,
                                       decimal=7)