def test_peto_weighted_logrank_on_leukemia_dataset(): """ Test against result from "Survival Analysis: A Self-learning Text" by Kleinbaum & Klein, 3rd edition, 2012. """ data = load_leukemia() group_1 = data[data["Rx"] == 0] group_2 = data[data["Rx"] == 1] result = stats.logrank_test(group_1["t"], group_2["t"], group_1["status"], group_2["status"], weightings="peto") assert abs(result.test_statistic - 14.084139) < 10e-6 assert result.test_name == "Peto_test"
# * a duration of time for the observation # * a binary column regarding censorship (`1` if the death event was observed, `0` if the death event was not observed) # # Sometimes, you will have to engineer these features. How might you go about that? What information would you need? # - # # Example 1: Leukemia # # `lifelines` comes with some datasets to get you started playing around with it. # # Most of the datasets are cleaned-up versions of real datasets. Here we will use their Leukemia dataset comparing 2 different treatments taken from http://web1.sph.emory.edu/dkleinb/allDatasets/surv2datasets/anderson.dat # + {"id": "d51G4sPqsgww", "colab_type": "code", "outputId": "6704a8e2-79ce-4fa5-d596-88c7bec6d818", "colab": {"base_uri": "https://localhost:8080/", "height": 206}} from lifelines.datasets import load_leukemia leukemia = load_leukemia() leukemia.head() # + {"id": "DQ936c5tsgw-", "colab_type": "code", "outputId": "94d35943-a551-46c5-975e-3862872cce1e", "colab": {"base_uri": "https://localhost:8080/", "height": 191}} leukemia.info() # + {"id": "MDvA8Z9rsgxL", "colab_type": "code", "outputId": "0ce5b40c-0a8c-4508-990f-25ad0d0a1810", "colab": {"base_uri": "https://localhost:8080/", "height": 300}} leukemia.describe() # + {"id": "tDasOEocsgxQ", "colab_type": "code", "outputId": "fa606f2c-3531-4d30-d453-55f257b85a9f", "colab": {"base_uri": "https://localhost:8080/", "height": 376}} durations = leukemia.t.values events = leukemia.status.values ax = lifelines.plotting.plot_lifetimes(durations=durations, event_observed=events)