Python StudyDefinition.make_df_from_expectations Examples, datalab_cohorts.StudyDefinition.make_df_from_expectations Python Examples

Example #1

0

Show file

File: test_expectations.py Project: jamesscottbrown/opensafely-research-template

def test_make_df_from_expectations_with_categories_expression_validation():
    study = StudyDefinition(
        population=patients.all(),
        category=patients.categorised_as(
            {
                "A": "sex = 'F'",
                "B": "sex = 'M'"
            },
            sex=patients.sex(),
            return_expectations={
                "rate": "exponential_increase",
                "incidence": 0.2,
                "category": {
                    "ratios": {
                        "A": 0.3,
                        "B": 0.6,
                        "C": 0.1
                    }
                },
                "date": {
                    "earliest": "1900-01-01",
                    "latest": "today"
                },
            },
        ),
    )
    population_size = 10000
    with pytest.raises(ValueError):
        study.make_df_from_expectations(population_size)

Example #2

0

Show file

File: test_expectations.py Project: jamesscottbrown/opensafely-research-template

def test_make_df_from_expectations_with_categories_in_codelist_validation():
    categorised_codelist = codelist([("X", "Y")], system="ctv3")
    categorised_codelist.has_categories = True
    study = StudyDefinition(
        population=patients.all(),
        ethnicity=patients.with_these_clinical_events(
            categorised_codelist,
            returning="category",
            return_expectations={
                "rate": "exponential_increase",
                "incidence": 0.2,
                "category": {
                    "ratios": {
                        "A": 0.3,
                        "B": 0.7
                    }
                },
                "date": {
                    "earliest": "1900-01-01",
                    "latest": "today"
                },
            },
            find_last_match_in_period=True,
            include_date_of_match=False,
        ),
    )
    population_size = 10000
    with pytest.raises(ValueError):
        study.make_df_from_expectations(population_size)

Example #3

0

Show file

def test_make_df_no_categories_validation_when_no_categories_in_definition():
    study = StudyDefinition(
        population=patients.all(),
        sex=patients.sex(
            return_expectations={
                "rate": "universal",
                "category": {"ratios": {"M": 0.49, "F": 0.51}},
            }
        ),
    )
    population_size = 10000
    # Just ensuring no exception is raised
    study.make_df_from_expectations(population_size)

Example #4

0

Show file

File: test_expectations.py Project: jamesscottbrown/opensafely-research-template

def test_make_df_from_expectations_with_distribution_and_date():
    study = StudyDefinition(
        population=patients.all(),
        bmi=patients.most_recent_bmi(
            on_or_after="2010-02-01",
            minimum_age_at_measurement=16,
            include_measurement_date=True,
            include_month=True,
            return_expectations={
                "rate": "exponential_increase",
                "incidence": 0.6,
                "float": {
                    "distribution": "normal",
                    "mean": 35,
                    "stddev": 10
                },
                "date": {
                    "earliest": "1900-01-01",
                    "latest": "today"
                },
            },
        ),
    )
    population_size = 10000
    result = study.make_df_from_expectations(population_size)
    assert list(sorted(result.columns)) == ["bmi", "bmi_date_measured"]

    # Check that the null-valued rows are aligned with each other
    assert (result["bmi"][pd.isnull(
        result["bmi"])].fillna(0) == result["bmi_date_measured"][pd.isnull(
            result["bmi_date_measured"])].fillna(0)).all()

Example #5

0

Show file

File: test_expectations.py Project: jamesscottbrown/opensafely-research-template

def test_make_df_from_expectations_with_date_filter():
    study = StudyDefinition(
        population=patients.all(),
        asthma_condition=patients.with_these_clinical_events(
            codelist(["X"], system="ctv3"),
            between=["2001-12-01", "2002-06-01"],
            returning="date",
            return_expectations={
                "rate": "exponential_increase",
                "incidence": 0.2,
                "date": {
                    "earliest": "1900-01-01",
                    "latest": "today"
                },
            },
            find_first_match_in_period=True,
            include_month=True,
            include_day=True,
        ),
    )
    population_size = 10000
    result = study.make_df_from_expectations(population_size)
    assert result.columns == ["asthma_condition"]
    assert result[~pd.isnull(result["asthma_condition"])].max(
    )[0] <= "2002-06-01"

Example #6

0

Show file

File: test_expectations.py Project: jamesscottbrown/opensafely-research-template

def test_make_df_from_expectations_with_categories_expression():
    study = StudyDefinition(
        population=patients.all(),
        category=patients.categorised_as(
            {
                "A": "sex = 'F'",
                "B": "sex = 'M'"
            },
            sex=patients.sex(),
            return_expectations={
                "rate": "exponential_increase",
                "incidence": 0.2,
                "category": {
                    "ratios": {
                        "A": 0.3,
                        "B": 0.7
                    }
                },
                "date": {
                    "earliest": "1900-01-01",
                    "latest": "today"
                },
            },
        ),
    )
    population_size = 10000
    result = study.make_df_from_expectations(population_size)
    value_counts = result.category.value_counts()
    assert value_counts["A"] < value_counts["B"]

Example #7

0

Show file

File: test_expectations.py Project: jamesscottbrown/opensafely-research-template

def test_make_df_from_expectations_with_categories():
    categorised_codelist = codelist([("1", "A"), ("2", "B")], system="ctv3")
    categorised_codelist.has_categories = True
    study = StudyDefinition(
        population=patients.all(),
        ethnicity=patients.with_these_clinical_events(
            categorised_codelist,
            returning="category",
            return_expectations={
                "rate": "exponential_increase",
                "incidence": 0.2,
                "category": {
                    "ratios": {
                        "A": 0.3,
                        "B": 0.7
                    }
                },
                "date": {
                    "earliest": "1900-01-01",
                    "latest": "today"
                },
            },
            find_last_match_in_period=True,
            include_date_of_match=False,
        ),
    )
    population_size = 10000
    result = study.make_df_from_expectations(population_size)
    assert result.columns == ["ethnicity"]

    category_counts = result.reset_index().groupby("ethnicity").count()
    assert category_counts.loc["A", :][0] < category_counts.loc["B", :][0]

Example #8

0

Show file

def test_make_df_from_expectations_doesnt_alter_date_defaults():

    study = StudyDefinition(
        default_expectations={
            "rate": "exponential_increase",
            "incidence": 1.0,
            "date": {"earliest": "1900-01-01", "latest": "today"},
            "category": {"ratios": {"M": 0.5, "F": 0.5}},
        },
        population=patients.all(),
        with_different_incidence=patients.with_these_clinical_events(
            codelist(["X"], system="ctv3"),
            returning="date",
            return_expectations={"incidence": 0.2},
            include_day=True,
        ),
        with_different_date=patients.with_these_clinical_events(
            codelist(["X"], system="ctv3"),
            returning="date",
            return_expectations={"date": {"earliest": "2015-01-01", "latest": "today"}},
            include_day=True,
        ),
        with_defaults=patients.with_these_clinical_events(
            codelist(["X"], system="ctv3"),
            returning="date",
            return_expectations={"date": {}},
            include_day=True,
        ),
    )
    population_size = 10000
    result = study.make_df_from_expectations(population_size)

    # Regression test: make sure defaults are respected even when they've been overridden
    assert result.with_defaults.min() < "2015-01-01"
    assert len(result[pd.isnull(result.with_defaults)]) == 0

Example #9

0

Show file

File: test_expectations.py Project: hotelzululima/ics-research

def test_make_df_from_expectations_partial_default_overrides():
    study = StudyDefinition(
        default_expectations={
            "date": {
                "earliest": "1900-01-01",
                "latest": "today"
            },
            "rate": "exponential_increase",
            "incidence": 0.2,
        },
        population=patients.all(),
        asthma_condition=patients.with_these_clinical_events(
            codelist(["X"], system="ctv3"),
            returning="date",
            find_first_match_in_period=True,
            date_format="YYYY",
            return_expectations={"date": {
                "latest": "2000-01-01"
            }},
        ),
    )

    population_size = 10000
    result = study.make_df_from_expectations(population_size)
    assert result.asthma_condition.astype("float").max() == 2000

Example #10

0

Show file

File: test_expectations.py Project: hotelzululima/ics-research

def test_make_df_from_expectations_with_number_of_episodes():
    study = StudyDefinition(
        population=patients.all(),
        episode_count=patients.with_these_clinical_events(
            codelist(["A", "B", "C"], system="ctv3"),
            ignore_days_where_these_codes_occur=codelist(["D", "E"],
                                                         system="ctv3"),
            returning="number_of_episodes",
            episode_defined_as="series of events each <= 14 days apart",
            return_expectations={
                "int": {
                    "distribution": "normal",
                    "mean": 4,
                    "stddev": 2
                },
                "date": {
                    "earliest": "1900-01-01",
                    "latest": "today"
                },
                "incidence": 0.2,
            },
        ),
    )
    population_size = 10000
    result = study.make_df_from_expectations(population_size)
    assert result.columns == ["episode_count"]

Example #11

0

Show file

File: test_expectations.py Project: backToBayesics/openSafely-sandpit

def test_make_df_from_expectations_doesnt_alter_defaults():
    study = StudyDefinition(
        default_expectations={
            "rate": "exponential_increase",
            "incidence": 1.0,
            "category": {
                "ratios": {
                    "M": 0.5,
                    "F": 0.5
                }
            },
        },
        population=patients.all(),
        sex_altered=patients.sex(return_expectations={
            "incidence": 0.1,
            "category": {
                "ratios": {
                    "M": 0.5,
                    "F": 0.5
                }
            },
        }),
        sex_default=patients.sex(
            return_expectations={"category": {
                "ratios": {
                    "M": 0.5,
                    "F": 0.5
                }
            }}),
    )
    population_size = 10000
    # Just ensuring no exception is raised
    result = study.make_df_from_expectations(population_size)
    assert len(result[pd.isnull(result.sex_default)]) == 0

Example #12

0

Show file

def test_make_df_from_binary_default_outcome():
    study = StudyDefinition(
        population=patients.all(),
        died=patients.died_from_any_cause(return_expectations={"incidence": 0.1}),
    )
    population_size = 10000
    result = study.make_df_from_expectations(population_size)
    assert len(result[~pd.isnull(result.died)]) == 0.1 * population_size

Example #13

0

Show file

def test_make_df_from_expectations_with_mean_recorded_value():
    study = StudyDefinition(
        population=patients.all(),
        drug_x=patients.mean_recorded_value(
            codelist(["X"], system="ctv3"),
            on_most_recent_day_of_measurement=True,
            return_expectations={
                "rate": "exponential_increase",
                "incidence": 0.6,
                "float": {"distribution": "normal", "mean": 35, "stddev": 10},
            },
        ),
    )
    population_size = 10000
    result = study.make_df_from_expectations(population_size)
    assert abs(35 - int(result["drug_x"].mean())) < 5

Example #14

0

Show file

def test_make_df_from_expectations_returning_date_using_defaults():
    study = StudyDefinition(
        default_expectations={
            "date": {"earliest": "1900-01-01", "latest": "today"},
            "rate": "exponential_increase",
        },
        population=patients.all(),
        asthma_condition=patients.with_these_clinical_events(
            codelist(["X"], system="ctv3"),
            returning="date",
            return_expectations={"incidence": 0.2},
            find_first_match_in_period=True,
            date_format="YYYY-MM-DD",
        ),
    )
    population_size = 10000
    result = study.make_df_from_expectations(population_size)
    assert result[~pd.isnull(result["asthma_condition"])].min()[0] < "1960-01-01"

Example #15

0

Show file

File: test_expectations.py Project: hotelzululima/ics-research

def test_make_df_from_expectations_with_care_home_status():
    study = StudyDefinition(
        population=patients.all(),
        is_in_care_home=patients.care_home_status_as_of(
            "2020-01-01",
            return_expectations={
                "rate": "exponential_increase",
                "incidence": 0.3,
                "date": {
                    "earliest": "1900-01-01",
                    "latest": "2020-01-01"
                },
                "bool": True,
            },
        ),
        care_home_type=patients.care_home_status_as_of(
            "2020-01-01",
            categorised_as={
                "PN": "IsPotentialCareHome AND LocationRequiresNursing='Y'",
                "PC": "IsPotentialCareHome",
                "U": "DEFAULT",
            },
            return_expectations={
                "rate": "exponential_increase",
                "incidence": 0.2,
                "category": {
                    "ratios": {
                        "PN": 0.1,
                        "PC": 0.2,
                        "U": 0.7
                    }
                },
                "date": {
                    "earliest": "1900-01-01",
                    "latest": "today"
                },
            },
        ),
    )
    population_size = 10000
    result = study.make_df_from_expectations(population_size)
    value_counts = result.care_home_type.value_counts()
    assert value_counts["PN"] < value_counts["U"]

Example #16

0

Show file

File: test_expectations.py Project: hotelzululima/ics-research

def test_make_df_from_expectations_with_satisfying():
    study = StudyDefinition(
        population=patients.all(),
        has_condition=patients.satisfying(
            "condition_a OR condition_b",
            condition_a=patients.with_these_clinical_events(
                codelist(["A", "B", "C"], system="ctv3")),
            condition_b=patients.with_these_clinical_events(
                codelist(["X", "Y", "Z"], system="ctv3")),
            return_expectations={
                "date": {
                    "earliest": "2001-01-01",
                    "latest": "2020-03-01"
                },
                "incidence": 0.95,
            },
        ),
    )
    population_size = 10000
    result = study.make_df_from_expectations(population_size)
    assert result.columns == ["has_condition"]