Ejemplo n.º 1
0
def test_peto_weighted_logrank_on_leukemia_dataset():
    """
    Test against result from "Survival Analysis: A Self-learning Text" by Kleinbaum & Klein, 3rd edition, 2012.
    """
    data = load_leukemia()
    group_1 = data[data["Rx"] == 0]
    group_2 = data[data["Rx"] == 1]

    result = stats.logrank_test(group_1["t"], group_2["t"], group_1["status"], group_2["status"], weightings="peto")

    assert abs(result.test_statistic - 14.084139) < 10e-6
    assert result.test_name == "Peto_test"
Ejemplo n.º 2
0
# * a duration of time for the observation
# * a binary column regarding censorship (`1` if the death event was observed, `0` if the death event was not observed)
#
# Sometimes, you will have to engineer these features. How might you go about that? What information would you need?
# -

# # Example 1: Leukemia
#
# `lifelines` comes with some datasets to get you started playing around with it.
#
# Most of the datasets are cleaned-up versions of real datasets. Here we will use their Leukemia dataset comparing 2 different treatments taken from http://web1.sph.emory.edu/dkleinb/allDatasets/surv2datasets/anderson.dat

# + {"id": "d51G4sPqsgww", "colab_type": "code", "outputId": "6704a8e2-79ce-4fa5-d596-88c7bec6d818", "colab": {"base_uri": "https://localhost:8080/", "height": 206}}
from lifelines.datasets import load_leukemia

leukemia = load_leukemia()

leukemia.head()

# + {"id": "DQ936c5tsgw-", "colab_type": "code", "outputId": "94d35943-a551-46c5-975e-3862872cce1e", "colab": {"base_uri": "https://localhost:8080/", "height": 191}}
leukemia.info()

# + {"id": "MDvA8Z9rsgxL", "colab_type": "code", "outputId": "0ce5b40c-0a8c-4508-990f-25ad0d0a1810", "colab": {"base_uri": "https://localhost:8080/", "height": 300}}
leukemia.describe()

# + {"id": "tDasOEocsgxQ", "colab_type": "code", "outputId": "fa606f2c-3531-4d30-d453-55f257b85a9f", "colab": {"base_uri": "https://localhost:8080/", "height": 376}}
durations = leukemia.t.values
events = leukemia.status.values

ax = lifelines.plotting.plot_lifetimes(durations=durations,
                                       event_observed=events)