Beispiel #1
0
study = StudyDefinition(
    # Configure the expectations framework
    default_expectations={
        "date": {
            "earliest": "1900-01-01",
            "latest": "today"
        },
        "rate": "uniform",
        "incidence": 0.1,
    },
    # This line defines the study population
    population=patients.registered_with_one_practice_between(
        "2019-03-01", "2020-03-01"),
    # Outcomes
    icu_date_admitted=patients.admitted_to_icu(
        on_or_after="2020-03-01",
        include_day=True,
        returning="date_admitted",
        find_first_match_in_period=True,
        return_expectations={
            "date": {
                "earliest": "2020-03-01"
            },
            "incidence": 0.1
        },
    ),
    died_date_cpns=patients.with_death_recorded_in_cpns(
        on_or_before="2020-06-01",
        returning="date_of_death",
        include_month=True,
        include_day=True,
        return_expectations={"date": {
            "earliest": "2020-03-01"
        }},
    ),
    died_ons_covid_flag_any=patients.with_these_codes_on_death_certificate(
        covid_identification,
        on_or_after="2020-03-01",
        match_only_underlying_cause=False,
        return_expectations={
            "date": {
                "earliest": "2020-03-01"
            },
            "incidence": 0.1
        },
    ),
    died_ons_covid_flag_underlying=patients.
    with_these_codes_on_death_certificate(
        covid_identification,
        on_or_after="2020-03-01",
        match_only_underlying_cause=True,
        return_expectations={
            "date": {
                "earliest": "2020-03-01"
            },
            "incidence": 0.1
        },
    ),
    died_date_ons=patients.died_from_any_cause(
        on_or_after="2020-06-01",
        returning="date_of_death",
        include_month=True,
        include_day=True,
        return_expectations={
            "date": {
                "earliest": "2020-03-01"
            },
            "incidence": 0.1
        },
    ),
    # The rest of the lines define the covariates with associated GitHub issues
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/33
    age=patients.age_as_of(
        "2020-03-01",
        return_expectations={
            "rate": "universal",
            "int": {
                "distribution": "population_ages"
            },
        },
    ),
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/46
    sex=patients.sex(return_expectations={
        "rate": "universal",
        "category": {
            "ratios": {
                "M": 0.49,
                "F": 0.51
            }
        },
    }),
    ethnicity=patients.with_these_clinical_events(
        ethnicity_codes,
        returning="category",
        find_last_match_in_period=True,
        include_date_of_match=True,
        return_expectations={
            "category": {
                "ratios": {
                    "1": 0.8,
                    "5": 0.1,
                    "3": 0.1
                }
            },
            "incidence": 0.75,
        },
    ),
    # IMID disease codes
    crohns_disease=first_diagnosis_in_period(crohns_disease_codes),
    ulcerative_colitis=first_diagnosis_in_period(ulcerative_colitis_codes),
    inflammatory_bowel_disease_unclassified=first_diagnosis_in_period(
        inflammatory_bowel_disease_unclassified_codes),
    psoriasis=first_diagnosis_in_period(psoriasis_codes),
    hidradenitis_suppurativa=first_diagnosis_in_period(
        hidradenitis_suppurativa_codes),
    psoriatic_arthritis=first_diagnosis_in_period(psoriatic_arthritis_codes),
    rheumatoid_arthritis=first_diagnosis_in_period(rheumatoid_arthritis_codes),

    # Comorbidities
    chronic_cardiac_disease=first_diagnosis_in_period(
        chronic_cardiac_disease_codes),
    diabetes=first_diagnosis_in_period(diabetes_codes),
    hba1c_new=first_diagnosis_in_period(hba1c_new_codes),
    hba1c_old=first_diagnosis_in_period(hba1c_old_codes),
    hba1c_mmol_per_mol=patients.with_these_clinical_events(
        hba1c_new_codes,
        find_last_match_in_period=True,
        on_or_before="2020-02-29",
        returning="numeric_value",
        include_date_of_match=True,
        include_month=True,
        return_expectations={
            "date": {
                "latest": "2020-02-29"
            },
            "float": {
                "distribution": "normal",
                "mean": 40.0,
                "stddev": 20
            },
            "incidence": 0.95,
        },
    ),
    hba1c_percentage=patients.with_these_clinical_events(
        hba1c_old_codes,
        find_last_match_in_period=True,
        on_or_before="2020-02-29",
        returning="numeric_value",
        include_date_of_match=True,
        include_month=True,
        return_expectations={
            "date": {
                "latest": "2020-02-29"
            },
            "float": {
                "distribution": "normal",
                "mean": 5,
                "stddev": 2
            },
            "incidence": 0.95,
        },
    ),
    hypertension=first_diagnosis_in_period(hypertension_codes),
    chronic_respiratory_disease=first_diagnosis_in_period(
        chronic_respiratory_disease_codes),
    copd=first_diagnosis_in_period(copd_codes),
    chronic_liver_disease=first_diagnosis_in_period(
        chronic_liver_disease_codes),
    stroke=first_diagnosis_in_period(stroke_codes),
    lung_cancer=first_diagnosis_in_period(lung_cancer_codes),
    haem_cancer=first_diagnosis_in_period(haem_cancer_codes),
    other_cancer=first_diagnosis_in_period(other_cancer_codes),
    #CKD
    creatinine=patients.with_these_clinical_events(
        creatinine_codes,
        find_last_match_in_period=True,
        between=["2018-12-01", "2020-02-29"],
        returning="numeric_value",
        include_date_of_match=True,
        include_month=True,
        return_expectations={
            "float": {
                "distribution": "normal",
                "mean": 150.0,
                "stddev": 200.0
            },
            "date": {
                "earliest": "2018-12-01",
                "latest": "2020-02-29"
            },
            "incidence": 0.95,
        },
    ),
    #### end stage renal disease codes incl. dialysis / transplant
    esrf=patients.with_these_clinical_events(
        ckd_codes,
        on_or_before="2020-02-29",
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    ckd=first_diagnosis_in_period(ckd_codes),
    organ_transplant=first_diagnosis_in_period(organ_transplant_codes),
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/10
    bmi=patients.most_recent_bmi(
        on_or_after="2010-02-01",
        minimum_age_at_measurement=16,
        include_measurement_date=True,
        include_month=True,
        return_expectations={
            "incidence": 0.6,
            "float": {
                "distribution": "normal",
                "mean": 35,
                "stddev": 10
            },
        },
    ),
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/54
    stp=patients.registered_practice_as_of(
        "2020-03-01",
        returning="stp_code",
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "STP1": 0.5,
                    "STP2": 0.5
                }
            },
        },
    ),
    msoa=patients.registered_practice_as_of(
        "2020-03-01",
        returning="msoa_code",
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "MSOA1": 0.5,
                    "MSOA2": 0.5
                }
            },
        },
    ),
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/52
    imd=patients.address_as_of(
        "2020-03-01",
        returning="index_of_multiple_deprivation",
        round_to_nearest=100,
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "100": 0.1,
                    "200": 0.2,
                    "300": 0.7
                }
            },
        },
    ),
    rural_urban=patients.address_as_of(
        "2020-03-01",
        returning="rural_urban_classification",
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "rural": 0.1,
                    "urban": 0.9
                }
            },
        },
    ),
    #SMOKING
    smoking_status=patients.categorised_as(
        {
            "S": "most_recent_smoking_code = 'S'",
            "E": """
                     most_recent_smoking_code = 'E' OR (    
                       most_recent_smoking_code = 'N' AND ever_smoked   
                     )  
                """,
            "N": "most_recent_smoking_code = 'N' AND NOT ever_smoked",
            "M": "DEFAULT",
        },
        return_expectations={
            "category": {
                "ratios": {
                    "S": 0.6,
                    "E": 0.1,
                    "N": 0.2,
                    "M": 0.1
                }
            }
        },
        most_recent_smoking_code=patients.with_these_clinical_events(
            clear_smoking_codes,
            find_last_match_in_period=True,
            on_or_before="2020-02-29",
            returning="category",
        ),
        ever_smoked=patients.with_these_clinical_events(
            filter_codes_by_category(clear_smoking_codes, include=["S", "E"]),
            on_or_before="2020-02-29",
        ),
    ),
    smoking_status_date=patients.with_these_clinical_events(
        clear_smoking_codes,
        on_or_before="2020-02-29",
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    ### GP CONSULTATION RATE
    gp_consult_count=patients.with_gp_consultations(
        between=["2019-03-01", "2020-02-29"],
        returning="number_of_matches_in_period",
        return_expectations={
            "int": {
                "distribution": "normal",
                "mean": 4,
                "stddev": 2
            },
            "date": {
                "earliest": "2019-03-01",
                "latest": "2020-02-29"
            },
            "incidence": 0.7,
        },
    ),
    has_consultation_history=patients.
    with_complete_gp_consultation_history_between(
        "2019-03-01",
        "2020-02-29",
        return_expectations={"incidence": 0.9},
    ),
    # Medications
    **medication_counts_and_dates_all([
        ("oral_prednisolone",
         "opensafely-asthma-oral-prednisolone-medication"),
        ("anti_tnf", "crossimid-anti-tnf-medication"),
        ("anti_il6", "crossimid-anti-il6-medication"),
        ("anti_il12-23", "crossimid-anti-il12-23-medication"),
        ("anti_il1", "crossimid-anti-il1-medication"),
        ("anti_il4", "crossimid-anti-il4-medication"),
        ("jak_inhibitors", "crossimid-jak-inhibitors-medication"),
        ("rituximab", "crossimid-rituximab-medication"),
        ("anti_integrin", "crossimid-anti-integrin-medication"),
        ("azathioprine", "crossimid-azathioprine-medication"),
        ("ciclosporin", "crossimid-ciclosporin-medication"),
        ("gold", "crossimid-gold-medication"),
        ("jak_inhibitors", "crossimid-jak-inhibitors-medication"),
        ("leflunomide", "crossimid-leflunomide-medication"),
        ("mercaptopurine", "crossimid-mercaptopurine-medication"),
        ("methotrexate", "crossimid-methotrexate-medication"),
        ("mycophenolate", "crossimid-mycophenolate-medication"),
        ("penicillamine", "crossimid-penicillamine-medication"),
        ("sulfasalazine", "crossimid-sulfasalazine-medication")
    ]))
Beispiel #2
0
study = StudyDefinition(
    # Configure the expectations framework
    default_expectations={
        "date": {
            "earliest": "2020-01-01",
            "latest": "today"
        },
        "rate": "universal",
    },

    # define the study index date
    index_date=index_date,

    # This line defines the study population
    population=patients.satisfying(
        """
        age >= 16 AND 
        (NOT died) AND
        (registered) AND
        (cns)
        """,
        died=patients.died_from_any_cause(on_or_before=index_date,
                                          returning="binary_flag"),
        pop_age=patients.age_as_of(index_date),
        registered=patients.registered_as_of(index_date),
        cns=patients.with_these_clinical_events(
            cns_code,
            between=["index_date", "index_date + 1 month"],
            returning="binary_flag",
            return_expectations={"incidence": 0.6},
        )),
    age=patients.age_as_of(index_date,
                           return_expectations={
                               "rate": "universal",
                               "int": {
                                   "distribution": "population_ages"
                               }
                           }),
    age_group=patients.categorised_as(
        {
            "0": "DEFAULT",
            "16 - under 40": " age >= 16 AND age < 40",
            "40 - under 50": " age >= 40 AND age < 50",
            "50 - under 65": " age >= 50 AND age < 65",
            "65 plus": " age >= 65",
        },
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "16 - under 40": 0.5,
                    "40 - under 50": 0.125,
                    "50 - under 65": 0.25,
                    "65 plus": 0.125,
                }
            },
        },
    ),
    stp=patients.registered_practice_as_of(
        "index_date",
        returning="stp_code",
        return_expectations={
            "category": {
                "ratios": {
                    "STP1": 0.5,
                    "STP2": 0.5
                }
            },
        },
    ),
    first_dose=patients.with_these_clinical_events(
        first_dose_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    second_dose=patients.with_these_clinical_events(
        second_dose_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
)
Beispiel #3
0
study = StudyDefinition(
    index_date=from_date,
    default_expectations={
        "date": {
            "earliest": "1970-01-01",
            "latest": "today"
        },
        "rate": "uniform",
        "incidence": 0.2,
    },
    # This line defines the study population
    population=patients.registered_with_one_practice_between(
        from_date, to_date),
    # demographic info
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/33
    age=patients.age_as_of(
        from_date,
        return_expectations={
            "rate": "universal",
            "int": {
                "distribution": "population_ages"
            },
        },
    ),
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/46
    sex=patients.sex(return_expectations={
        "rate": "universal",
        "category": {
            "ratios": {
                "M": 0.49,
                "F": 0.51
            }
        },
    }),
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/52
    imd=patients.address_as_of(
        from_date,
        returning="index_of_multiple_deprivation",
        round_to_nearest=100,
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "100": 0.1,
                    "200": 0.2,
                    "300": 0.7
                }
            },
        },
    ),
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/54
    stp=patients.registered_practice_as_of(
        from_date,
        returning="stp_code",
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "STP1": 0.1,
                    "STP2": 0.1,
                    "STP3": 0.1,
                    "STP4": 0.1,
                    "STP5": 0.1,
                    "STP6": 0.1,
                    "STP7": 0.1,
                    "STP8": 0.1,
                    "STP9": 0.1,
                    "STP10": 0.1,
                }
            },
        },
    ),
    # region - one of NHS England 9 regions
    region=patients.registered_practice_as_of(
        from_date,
        returning="nuts1_region_name",
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "North East": 0.1,
                    "North West": 0.1,
                    "Yorkshire and the Humber": 0.1,
                    "East Midlands": 0.1,
                    "West Midlands": 0.1,
                    "East of England": 0.1,
                    "London": 0.2,
                    "South East": 0.2,
                },
            },
        },
    ),
    # covid-related code dates
    **date_X("codes_antigen_negative", n=m),
    **date_X("codes_exposure_to_disease", n=n),
    **date_X("codes_historic_covid", n=n),
    **date_X("codes_potential_historic_covid", n=m),
    **date_X("codes_probable_covid", n=m),
    **date_X("codes_probable_covid_pos_test", n=n),
    **date_X("codes_probable_covid_sequelae", n=n),
    **date_X("codes_suspected_covid_advice", n=m),
    **date_X("codes_suspected_covid_had_test", n=m),
    **date_X("codes_suspected_covid_isolation", n=n),
    **date_X("codes_suspected_covid_nonspecific", n=n),
    **date_X("codes_suspected_covid", n=m),
    **date_X("codes_covid_unrelated_to_case_status", n=m),
    **date_X("codes_suspected_covid_had_antigen_test", n=n),
    **sgss_X(n=n),
    # Outcomes
    died_ons_covid=patients.with_these_codes_on_death_certificate(
        codes_covid_death,
        returning="binary_flag",
        on_or_after=from_date,
        match_only_underlying_cause=False,
        return_expectations={"date": {
            "earliest": from_date
        }},
    ),
    died_ons_covid_underlying=patients.with_these_codes_on_death_certificate(
        codes_covid_death,
        returning="binary_flag",
        on_or_after=from_date,
        match_only_underlying_cause=True,
        return_expectations={"date": {
            "earliest": "2020-03-01"
        }},
    ),
    died_ons=patients.died_from_any_cause(
        returning="binary_flag",
        on_or_after=from_date,
        return_expectations={"date": {
            "earliest": from_date
        }},
    ),
    died_ons_noncovid=patients.satisfying(
        """(NOT died_ons_covid) AND died_ons""",
        return_expectations={"incidence": 0.15},
    ),
    death_category=patients.categorised_as(
        {
            "alive": "NOT died_ons",
            "covid-death": "died_ons_covid",
            "non-covid-death": "died_ons_noncovid",
            "unknown": "DEFAULT",
        },
        return_expectations={
            "category": {
                "ratios": {
                    "alive": 0.8,
                    "covid-death": 0.1,
                    "non-covid-death": 0.1
                }
            }
        },
    ),
    date_died_ons=patients.died_from_any_cause(
        returning="date_of_death",
        on_or_after=from_date,
        date_format="YYYY-MM-DD",
        return_expectations={"date": {
            "earliest": from_date
        }},
    ),
)
Beispiel #4
0
study = StudyDefinition(
    # Configure the expectations framework
    default_expectations={
        "date": {
            "earliest": "1900-01-01",
            "latest": "today"
        },
        "rate": "uniform",
        "incidence": 0.5,
    },
    # STUDY POPULATION
    population=patients.satisfying(
        """
            has_follow_up AND
            (age >=18 AND age <= 110) AND
            (rheumatoid OR osteoarthritis) AND
            imd >0 AND NOT (
            (has_asthma AND saba_single) OR
            aspirin_ten_years OR
            stroke OR
            mi OR
            gi_bleed_ulcer
            )
            """,
        has_follow_up=patients.registered_with_one_practice_between(
            "2019-02-28", "2020-02-29"),
        has_asthma=patients.with_these_clinical_events(
            current_asthma_codes,
            between=["2017-02-28", "2020-02-29"],
        ),
    ),
    # The rest of the lines define the covariates with from the protocol with associated GitHub issues
    # OUTCOMES
    died_ons_covid_flag_any=patients.with_these_codes_on_death_certificate(
        covid_identification,
        on_or_after="2020-03-01",
        match_only_underlying_cause=False,
        return_expectations={"date": {
            "earliest": "2020-03-01"
        }},
    ),
    died_ons_covid_flag_underlying=patients.
    with_these_codes_on_death_certificate(
        covid_identification,
        on_or_after="2020-03-01",
        match_only_underlying_cause=True,
        return_expectations={"date": {
            "earliest": "2020-03-01"
        }},
    ),
    died_date_ons=patients.died_from_any_cause(
        on_or_after="2020-03-01",
        returning="date_of_death",
        include_month=True,
        include_day=True,
        return_expectations={"date": {
            "earliest": "2020-03-01"
        }},
    ),
    # PLACEHOLDER - SECONDARY OUTCOME:PRESENTING AT ED - this is a wip placeholder
    aande_attendance_with_covid=patients.attended_emergency_care(
        on_or_after="2020-03-01",
        returning="date_arrived",
        date_format="YYYY-MM-DD",
        with_these_diagnoses=
        ics_codes,  # placeholder https://github.com/opensafely/cohort-extractor/issues/182#issuecomment-651782064
        return_expectations={"date": {
            "earliest": "2020-03-01"
        }},
    ),
    # MEDICATIONS
    # NSAID
    nsaid_last_three_years=patients.with_these_medications(
        nsaid_codes,
        between=["2017-02-28", "2020-02-29"],
        returning="date",
        find_last_match_in_period=True,
        include_month=True,
        include_day=False,
        return_expectations={
            "date": {
                "earliest": "2017-02-28",
                "latest": "2020-02-29"
            }
        },
    ),
    nsaid_after_march=patients.with_these_medications(
        nsaid_codes,
        on_or_after="2020-03-01",
        returning="date",
        find_first_match_in_period=True,
        include_month=True,
        include_day=True,
        return_expectations={
            "date": {
                "earliest": "2020-03-01",
                "latest": "2020-05-29"
            }
        },
    ),
    nsaid_last_four_months=patients.with_these_medications(
        nsaid_codes,
        between=["2019-11-01", "2020-02-29"],
        returning="date",
        find_last_match_in_period=True,
        include_month=True,
        include_day=False,
        return_expectations={
            "date": {
                "earliest": "2017-02-28",
                "latest": "2020-02-29"
            }
        },
    ),
    nsaid_last_two_months=patients.with_these_medications(
        nsaid_codes,
        between=["2020-01-01", "2020-02-29"],
        returning="date",
        find_last_match_in_period=True,
        include_month=True,
        include_day=False,
        return_expectations={
            "date": {
                "earliest": "2020-01-01",
                "latest": "2020-02-29"
            }
        },
    ),
    nsaid_last_month=patients.with_these_medications(
        nsaid_codes,
        between=["2020-02-01", "2020-02-29"],
        returning="date",
        find_last_match_in_period=True,
        include_month=True,
        include_day=False,
        return_expectations={
            "date": {
                "earliest": "2020-02-01",
                "latest": "2020-02-29"
            }
        },
    ),

    # naproxen - high dose
    naproxen_high=patients.with_these_medications(
        naproxen_high_codes,
        between=["2019-11-01", "2020-02-29"],
        returning="date",
        find_last_match_in_period=True,
        include_month=True,
        include_day=False,
        return_expectations={
            "date": {
                "earliest": "2019-11-01",
                "latest": "2020-02-29"
            }
        },
    ),
    # naproxen low dose
    naproxen_low=patients.with_these_medications(
        naproxen_low_codes,
        between=["2019-11-01", "2020-02-29"],
        returning="date",
        find_last_match_in_period=True,
        include_month=True,
        include_day=False,
        return_expectations={
            "date": {
                "earliest": "2019-11-01",
                "latest": "2020-02-29"
            }
        },
    ),
    # naproxen - other
    naproxen_other=patients.with_these_medications(
        naproxen_other_codes,
        between=["2019-11-01", "2020-02-29"],
        returning="date",
        find_last_match_in_period=True,
        include_month=True,
        include_day=False,
        return_expectations={
            "date": {
                "earliest": "2019-11-01",
                "latest": "2020-02-29"
            }
        },
    ),
    # COX2 SPECIFIC
    cox_medication=patients.with_these_medications(
        cox_medication,
        between=["2019-11-01", "2020-02-29"],
        returning="date",
        find_last_match_in_period=True,
        include_month=True,
        include_day=False,
        return_expectations={
            "date": {
                "earliest": "2019-11-01",
                "latest": "2020-02-29"
            }
        },
    ),
    # ASPIRIN
    aspirin_ten_years=patients.with_these_medications(
        aspirin_med_codes,
        between=["2010-02-01", "2020-02-29"],
        returning="date",
        find_last_match_in_period=True,
        include_month=True,
        include_day=False,
        return_expectations={
            "date": {
                "earliest": "2010-11-01",
                "latest": "2020-02-29"
            }
        },
    ),
    aspirin_ever=patients.with_these_medications(
        aspirin_med_codes,
        on_or_before="2020-02-29",
        returning="date",
        find_last_match_in_period=True,
        include_month=True,
        include_day=False,
        return_expectations={
            "date": {
                "earliest": "2010-11-01",
                "latest": "2020-02-29"
            }
        },
    ),
    # IBUPROFEN
    ibuprofen=patients.with_these_medications(
        ibuprofen_med_codes,
        between=["2019-11-01", "2020-02-29"],
        returning="date",
        find_last_match_in_period=True,
        include_month=True,
        include_day=False,
        return_expectations={
            "date": {
                "earliest": "2019-11-01",
                "latest": "2020-02-29"
            }
        },
    ),
    # indometacin
    indometacin=patients.with_these_medications(
        indometacin_codes,
        between=["2019-11-01", "2020-02-29"],
        returning="date",
        find_last_match_in_period=True,
        include_month=True,
        include_day=False,
        return_expectations={
            "date": {
                "earliest": "2017-02-28",
                "latest": "2020-02-29"
            }
        },
    ),
    # Oral steroid - prednisolone
    steroid_prednisolone=patients.with_these_medications(
        prednisolone_med_codes,
        between=["2019-11-01", "2020-02-29"],
        returning="date",
        find_last_match_in_period=True,
        include_month=True,
        include_day=False,
        return_expectations={
            "date": {
                "earliest": "2017-02-28",
                "latest": "2020-02-29"
            }
        },
    ),
    # hydroxychloqoquine
    hydroxychloroquine=patients.with_these_medications(
        hcq_med_code,
        between=["2019-11-01", "2020-02-29"],
        returning="date",
        find_last_match_in_period=True,
        include_month=True,
        include_day=False,
        return_expectations={
            "date": {
                "earliest": "2017-02-28",
                "latest": "2020-02-29"
            }
        },
    ),
    # dmards
    dmards_primary_care=patients.with_these_medications(
        dmards_med_code,
        between=["2019-11-01", "2020-02-29"],
        returning="date",
        find_last_match_in_period=True,
        include_month=True,
        include_day=False,
        return_expectations={
            "date": {
                "earliest": "2017-02-28",
                "latest": "2020-02-29"
            }
        },
    ),
    # The rest of the lines define the covariates with from the protocol with associated GitHub issues
    # https://github.com/opensafely/nsaids-research/issues/1
    # PATIENT DEMOGRAPHICS
    age=patients.age_as_of(
        "2020-03-01",
        return_expectations={
            "rate": "universal",
            "int": {
                "distribution": "population_ages"
            },
        },
    ),
    sex=patients.sex(return_expectations={
        "rate": "universal",
        "category": {
            "ratios": {
                "M": 0.49,
                "F": 0.51
            }
        },
    }),
    stp=patients.registered_practice_as_of(
        "2020-02-29",
        returning="stp_code",
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "STP1": 0.1,
                    "STP2": 0.1,
                    "STP3": 0.1,
                    "STP4": 0.1,
                    "STP5": 0.1,
                    "STP6": 0.1,
                    "STP7": 0.1,
                    "STP8": 0.1,
                    "STP9": 0.1,
                    "STP10": 0.1,
                }
            },
        },
    ),
    imd=patients.address_as_of(
        "2020-02-29",
        returning="index_of_multiple_deprivation",
        round_to_nearest=100,
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "100": 0.1,
                    "200": 0.2,
                    "300": 0.7
                }
            },
        },
    ),
    msoa=patients.registered_practice_as_of(
        "2020-02-01",
        returning="msoa_code",
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "MSOA1": 0.5,
                    "MSOA2": 0.5
                }
            },
        },
    ),
    rural_urban=patients.address_as_of(
        "2020-02-01",
        returning="rural_urban_classification",
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "rural": 0.1,
                    "urban": 0.9
                }
            },
        },
    ),
    ethnicity=patients.with_these_clinical_events(
        ethnicity_codes,
        returning="category",
        find_last_match_in_period=True,
        include_date_of_match=True,
        return_expectations={
            "category": {
                "ratios": {
                    "1": 0.8,
                    "5": 0.1,
                    "3": 0.1
                }
            },
            "incidence": 0.75,
        },
    ),
    # CLINICAL COVARIATES
    # BMI
    bmi=patients.most_recent_bmi(
        on_or_after="2010-02-01",
        minimum_age_at_measurement=16,
        include_measurement_date=True,
        include_month=True,
        return_expectations={
            "incidence": 0.6,
            "float": {
                "distribution": "normal",
                "mean": 35,
                "stddev": 10
            },
        },
    ),
    # SMOKING
    smoking_status=patients.categorised_as(
        {
            "S": "most_recent_smoking_code = 'S'",
            "E": """
                     most_recent_smoking_code = 'E' OR (    
                       most_recent_smoking_code = 'N' AND ever_smoked   
                     )  
                """,
            "N": "most_recent_smoking_code = 'N' AND NOT ever_smoked",
            "M": "DEFAULT",
        },
        return_expectations={
            "category": {
                "ratios": {
                    "S": 0.6,
                    "E": 0.1,
                    "N": 0.2,
                    "M": 0.1
                }
            }
        },
        most_recent_smoking_code=patients.with_these_clinical_events(
            clear_smoking_codes,
            find_last_match_in_period=True,
            on_or_before="2020-02-29",
            returning="category",
        ),
        ever_smoked=patients.with_these_clinical_events(
            filter_codes_by_category(clear_smoking_codes, include=["S", "E"]),
            on_or_before="2020-02-29",
        ),
    ),
    smoking_status_date=patients.with_these_clinical_events(
        clear_smoking_codes,
        on_or_before="2020-02-29",
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    # HYPERTENSION - CLINICAL CODES ONLY
    hypertension=patients.with_these_clinical_events(
        hypertension_codes,
        on_or_before="2020-02-29",
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    # HEART FAILURE
    heart_failure=patients.with_these_clinical_events(
        heart_failure_codes,
        on_or_before="2020-02-29",
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    # OTHER HEART DISEASES
    other_heart_disease=patients.with_these_clinical_events(
        other_heart_disease_codes,
        on_or_before="2020-02-29",
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    # DIABETES
    diabetes=patients.with_these_clinical_events(
        diabetes_codes,
        on_or_before="2020-02-29",
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    hba1c_mmol_per_mol=patients.with_these_clinical_events(
        hba1c_new_codes,
        find_last_match_in_period=True,
        on_or_before="2020-02-29",
        returning="numeric_value",
        include_date_of_match=True,
        include_month=True,
        return_expectations={
            "date": {
                "latest": "2020-02-29"
            },
            "float": {
                "distribution": "normal",
                "mean": 40.0,
                "stddev": 20
            },
            "incidence": 0.95,
        },
    ),
    hba1c_percentage=patients.with_these_clinical_events(
        hba1c_old_codes,
        find_last_match_in_period=True,
        on_or_before="2020-02-29",
        returning="numeric_value",
        include_date_of_match=True,
        include_month=True,
        return_expectations={
            "date": {
                "latest": "2020-02-29"
            },
            "float": {
                "distribution": "normal",
                "mean": 5,
                "stddev": 2
            },
            "incidence": 0.95,
        },
    ),
    # COPD
    copd=patients.with_these_clinical_events(
        copd_codes,
        on_or_before="2020-02-29",
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    # OTHER RESPIRATORY DISEASES
    other_respiratory=patients.with_these_clinical_events(
        other_respiratory_codes,
        on_or_before="2020-02-29",
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    # CURRENT ASTHMA
    asthma=patients.with_these_clinical_events(
        current_asthma_codes,
        on_or_before="2020-02-29",
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    # CANCER - 3 TYPES
    cancer=patients.with_these_clinical_events(
        combine_codelists(lung_cancer_codes, haem_cancer_codes,
                          other_cancer_codes),
        on_or_before="2020-02-29",
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    # IMMUNOSUPPRESSION
    #### PERMANENT
    permanent_immunodeficiency=patients.with_these_clinical_events(
        combine_codelists(
            hiv_codes,
            permanent_immune_codes,
            sickle_cell_codes,
            organ_transplant_codes,
            spleen_codes,
        ),
        on_or_before="2020-02-29",
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    aplastic_anaemia=patients.with_these_clinical_events(
        aplastic_codes,
        between=["2019-03-01", "2020-02-29"],
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2019-03-01",
                "latest": "2020-02-29"
            }
        },
    ),
    #### TEMPORARY
    temporary_immunodeficiency=patients.with_these_clinical_events(
        temp_immune_codes,
        between=["2019-03-01", "2020-02-29"],
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2019-03-01",
                "latest": "2020-02-29"
            }
        },
    ),
    # CKD
    creatinine=patients.with_these_clinical_events(
        creatinine_codes,
        find_last_match_in_period=True,
        between=["2019-02-28", "2020-02-29"],
        returning="numeric_value",
        include_date_of_match=True,
        include_month=True,
        return_expectations={
            "float": {
                "distribution": "normal",
                "mean": 60.0,
                "stddev": 15
            },
            "date": {
                "earliest": "2019-02-28",
                "latest": "2020-02-29"
            },
            "incidence": 0.95,
        },
    ),
    #### end stage renal disease codes incl. dialysis / transplant
    esrf=patients.with_these_clinical_events(
        ckd_codes,  # CHECK IS THIS DEF RIGHT HERE
        on_or_before="2020-02-29",
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    #### stroke
    stroke=patients.with_these_clinical_events(
        stroke_codes,
        on_or_before="2020-02-29",
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    #### Myocardial infarction
    mi=patients.with_these_clinical_events(
        mi_codes,
        on_or_before="2020-02-29",
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    #### GI BLEED
    gi_bleed_ulcer=patients.with_these_clinical_events(
        gi_bleed_ulcer_codes,
        on_or_before="2020-02-29",
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    # OSTEOARTHRITIS
    osteoarthritis=patients.with_these_clinical_events(
        osteoarthritis_codes,
        on_or_before="2020-02-29",
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    # RHEUMATOID ARTHRITIS
    rheumatoid=patients.with_these_clinical_events(
        rheumatoid_codes,
        on_or_before="2020-02-29",
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    # RHEUMATOID ARTHRITIS/OSTEOARTHRITIS MIXED
    mixed_arthritis=patients.categorised_as(
        {
            "R": "rheumatoid = 'R'",
            "O": "osteoarthritis = 'O'",
            "RO": """
                    rheumatoid OR osteoarthritis
                  """,
            "M": "DEFAULT",
        },
        return_expectations={
            "category": {
                "ratios": {
                    "R": 0.3,
                    "O": 0.1,
                    "RO": 0.2,
                    "M": 0.4
                }
            }
        },
    ),
    # FLU VACCINATION STATUS
    flu_vaccine_tpp_table=patients.with_tpp_vaccination_record(
        target_disease_matches="INFLUENZA",
        between=["2019-09-01", "2020-02-29"],  # current flu season
        returning="date",
        find_first_match_in_period=True,
        return_expectations={
            "date": {
                "earliest": "2019-09-01",
                "latest": "2020-02-29"
            }
        },
    ),
    flu_vaccine_med=patients.with_these_medications(
        flu_med_codes,
        between=["2019-09-01", "2020-02-29"],  # current flu season
        returning="date",
        find_last_match_in_period=True,
        include_month=True,
        include_day=True,
        return_expectations={
            "date": {
                "earliest": "2019-09-01",
                "latest": "2020-02-29"
            }
        },
    ),
    flu_vaccine_clinical=patients.with_these_clinical_events(
        flu_clinical_given_codes,
        ignore_days_where_these_codes_occur=flu_clinical_not_given_codes,
        between=["2019-09-01", "2020-02-29"],  # current flu season
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2019-09-01",
                "latest": "2020-02-29"
            }
        },
    ),
    flu_vaccine=patients.satisfying(
        """
        flu_vaccine_tpp_table OR
        flu_vaccine_med OR
        flu_vaccine_clinical
        """, ),
    # PNEUMOCOCCAL VACCINATION STATUS
    pneumococcal_vaccine_tpp_table=patients.with_tpp_vaccination_record(
        target_disease_matches="PNEUMOCOCCAL",
        between=["2015-03-01", "2020-02-29"],
        returning="date",
        find_last_match_in_period=True,
        return_expectations={
            "date": {
                "earliest": "2015-03-01",
                "latest": "2020-02-29"
            }
        },
    ),
    pneumococcal_vaccine_med=patients.with_these_medications(
        pneumococcal_med_codes,
        between=["2015-03-01", "2020-02-29"],  # past five years
        returning="date",
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2015-03-01",
                "latest": "2020-02-29"
            }
        },
    ),
    pneumococcal_vaccine_clinical=patients.with_these_clinical_events(
        pneumococcal_clinical_given_codes,
        ignore_days_where_these_codes_occur=
        pneumococcal_clinical_not_given_codes,
        between=["2015-03-01", "2020-02-29"],  # past five years
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2015-03-01",
                "latest": "2020-02-29"
            }
        },
    ),
    pneumococcal_vaccine=patients.satisfying(
        """
        pneumococcal_vaccine_tpp_table OR
        pneumococcal_vaccine_med OR
        pneumococcal_vaccine_clinical
        """, ),
    # STATIN USAGE
    statin=patients.with_these_medications(
        statin_med_codes,
        between=["2019-11-01", "2020-02-29"],
        returning="date",
        find_last_match_in_period=True,
        include_month=True,
        include_day=False,
        return_expectations={
            "date": {
                "earliest": "2019-11-01",
                "latest": "2020-02-29"
            }
        },
    ),
    # PROTON PUMP INHIBITOR USAGE
    ppi=patients.with_these_medications(
        ppi_med_codes,
        between=["2019-11-01", "2020-02-29"],
        returning="date",
        find_last_match_in_period=True,
        include_month=True,
        include_day=False,
        return_expectations={
            "date": {
                "earliest": "2019-11-01",
                "latest": "2020-02-29"
            }
        },
    ),
    #### SABA SINGLE CONSTITUENT - asthma treatment
    saba_single=patients.with_these_medications(
        saba_med_codes,
        between=["2019-11-01", "2020-02-29"],
        returning="date",
        find_last_match_in_period=True,
        include_month=True,
        include_day=False,
        return_expectations={
            "date": {
                "earliest": "2019-11-01",
                "latest": "2020-02-29"
            },
        },
    ),
    ##A&E ATTENDANCE IN PREVIOUS YEAR
    annde_attendance_last_year=patients.attended_emergency_care(
        between=["2019-03-01", "2020-02-29"],
        returning="number_of_matches_in_period",
        return_expectations={
            "int": {
                "distribution": "normal",
                "mean": 2,
                "stddev": 2
            },
            "date": {
                "earliest": "2019-03-01",
                "latest": "2020-02-29"
            },
            "incidence": 0.3,
        },
    ),
    ### GP CONSULTATION RATE
    gp_consult_count=patients.with_gp_consultations(
        between=["2019-03-01", "2020-02-29"],
        returning="number_of_matches_in_period",
        return_expectations={
            "int": {
                "distribution": "normal",
                "mean": 4,
                "stddev": 2
            },
            "date": {
                "earliest": "2019-03-01",
                "latest": "2020-02-29"
            },
            "incidence": 0.7,
        },
    ),
    has_consultation_history=patients.
    with_complete_gp_consultation_history_between(
        "2019-03-01",
        "2020-02-29",
        return_expectations={"incidence": 0.9},
    ),
)
study = StudyDefinition(
    # Configure the expectations framework (optional)
    default_expectations={
        "date": {"earliest": "1970-01-01", "latest": "today"},
        "rate": "uniform",
        "incidence": 0.05,
    },
    ## STUDY POPULATION (required)
    population=patients.all(),
    copd=patients.with_these_clinical_events(
        copd_codes, on_or_before="2020-02-29", return_expectations={"incidence": 0.4},
    ),
    age_cat=patients.satisfying(
        "age >=35 AND age <= 110",
        return_expectations={"incidence": 0.9},
        age=patients.age_as_of(
            "2020-02-29",
            return_expectations={
                "rate": "universal",
                "int": {"distribution": "population_ages"},
            },
        ),
    ),
    ever_smoked=patients.with_these_clinical_events(
        filter_codes_by_category(clear_smoking_codes, include=["S", "E"]),
        on_or_before="2020-02-29",
        return_expectations={"incidence": 0.9},
    ),
    has_follow_up=patients.registered_with_one_practice_between(
        "2019-02-28", "2020-02-29", return_expectations={"incidence": 0.9},
    ),
    recent_asthma=patients.with_these_clinical_events(
        asthma_codes,
        between=["2017-02-28", "2020-02-29"],
        return_expectations={"incidence": 0.05},
    ),
    other_respiratory=patients.with_these_clinical_events(
        other_respiratory_codes,
        on_or_before="2020-02-29",
        return_expectations={"incidence": 0.05},
    ),
    #### NEBULES
    nebules=patients.with_these_medications(
        nebulised_med_codes,
        between=["2019-02-28", "2020-02-29"],
        return_expectations={"incidence": 0.05},
    ),
    ltra_single=patients.with_these_medications(
        leukotriene_med_codes,
        between=["2019-11-01", "2020-02-29"],
        return_expectations={"incidence": 0.05},
    ),
)
Beispiel #6
0
study = StudyDefinition(
    # Configure the expectations framework (optional)
    default_expectations={
        "date": {
            "earliest": "1970-01-01",
            "latest": "today"
        },
        "rate": "uniform",
        "incidence": 0.2,
    },
    ## STUDY POPULATION (required)
    population=patients.satisfying(
        """
        (
          has_asthma OR
          (asthma_ever AND any_asthma_med)
        ) AND
        (age >=18 AND age <= 110) AND
        has_follow_up AND NOT
        copd AND NOT
        other_respiratory AND NOT
        nebules AND NOT
        (
          (lama_single OR laba_lama) AND NOT (
            high_dose_ics OR
            high_dose_ics_single_ing OR
            high_dose_ics_multiple_ingredient OR
            low_med_dose_ics_single_ingredient OR
            low_med_dose_ics_multiple_ingredient OR
            low_med_dose_ics OR
            ics_single OR
            laba_ics OR
            laba_lama_ics
          )
        )
        """,
        has_asthma=patients.with_these_clinical_events(
            asthma_codes,
            between=["2017-02-28", "2020-02-29"],
        ),
        has_follow_up=patients.registered_with_one_practice_between(
            "2019-02-28", "2020-02-29"),
        nebules=patients.with_these_medications(
            nebulised_med_codes,
            between=["2019-02-28", "2020-02-29"],
        ),
        any_asthma_med=patients.satisfying("""
            ltra_single OR
            laba_lama_ics OR
            laba_lama OR
            laba_ics OR
            lama_single OR
            laba_single OR
            sama_single OR
            saba_single OR
            ics_single OR
            low_med_dose_ics OR
            low_med_dose_ics_multiple_ingredient OR
            low_med_dose_ics_single_ingredient OR
            high_dose_ics_multiple_ingredient OR
            high_dose_ics_single_ing OR
            high_dose_ics

            """),
    ),
    ## OUTCOMES (at least one outcome or covariate is required)
    icu_date_admitted=patients.admitted_to_icu(
        on_or_after="2020-03-01",
        include_day=True,
        returning="date_admitted",
        return_expectations={"date": {
            "earliest": "2020-03-01"
        }},
    ),
    died_date_cpns=patients.with_death_recorded_in_cpns(
        on_or_after="2020-03-01",
        returning="date_of_death",
        include_month=True,
        include_day=True,
        return_expectations={"date": {
            "earliest": "2020-03-01"
        }},
    ),
    died_ons_covid_flag_any=patients.with_these_codes_on_death_certificate(
        covid_codelist,
        on_or_after="2020-03-01",
        match_only_underlying_cause=False,
        return_expectations={"date": {
            "earliest": "2020-03-01"
        }},
    ),
    died_ons_covid_flag_underlying=patients.
    with_these_codes_on_death_certificate(
        covid_codelist,
        on_or_after="2020-03-01",
        match_only_underlying_cause=True,
        return_expectations={"date": {
            "earliest": "2020-03-01"
        }},
    ),
    died_date_ons=patients.died_from_any_cause(
        on_or_after="2020-03-01",
        returning="date_of_death",
        include_month=True,
        include_day=True,
        return_expectations={"date": {
            "earliest": "2020-03-01"
        }},
    ),
    ## DEMOGRAPHIC INFORMATION
    age=patients.age_as_of(
        "2020-02-29",
        return_expectations={
            "rate": "universal",
            "int": {
                "distribution": "population_ages"
            },
        },
    ),
    sex=patients.sex(return_expectations={
        "rate": "universal",
        "category": {
            "ratios": {
                "M": 0.49,
                "F": 0.51
            }
        },
    }),
    stp=patients.registered_practice_as_of(
        "2020-02-29",
        returning="stp_code",
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "STP1": 0.1,
                    "STP2": 0.1,
                    "STP3": 0.1,
                    "STP4": 0.1,
                    "STP5": 0.1,
                    "STP6": 0.1,
                    "STP7": 0.1,
                    "STP8": 0.1,
                    "STP9": 0.1,
                    "STP10": 0.1,
                }
            },
        },
    ),
    imd=patients.address_as_of(
        "2020-02-29",
        returning="index_of_multiple_deprivation",
        round_to_nearest=100,
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "100": 0.1,
                    "200": 0.2,
                    "300": 0.7
                }
            },
        },
    ),
    ethnicity=patients.with_these_clinical_events(
        ethnicity_codes,
        returning="category",
        find_last_match_in_period=True,
        include_date_of_match=True,
        return_expectations={
            "category": {
                "ratios": {
                    "1": 0.8,
                    "5": 0.1,
                    "3": 0.1
                }
            },
            "incidence": 0.75,
        },
    ),
    ## COVARIATES
    bmi=patients.most_recent_bmi(
        between=["2010-02-28", "2020-02-29"],
        minimum_age_at_measurement=16,
        include_measurement_date=True,
        include_month=True,
        return_expectations={
            "date": {},
            "float": {
                "distribution": "normal",
                "mean": 35,
                "stddev": 10
            },
            "incidence": 0.95,
        },
    ),
    smoking_status=patients.categorised_as(
        {
            "S": "most_recent_smoking_code = 'S'",
            "E": """
                     most_recent_smoking_code = 'E' OR (    
                       most_recent_smoking_code = 'N' AND ever_smoked   
                     )  
                """,
            "N": "most_recent_smoking_code = 'N' AND NOT ever_smoked",
            "M": "DEFAULT",
        },
        return_expectations={
            "category": {
                "ratios": {
                    "S": 0.6,
                    "E": 0.1,
                    "N": 0.2,
                    "M": 0.1
                }
            }
        },
        most_recent_smoking_code=patients.with_these_clinical_events(
            clear_smoking_codes,
            find_last_match_in_period=True,
            on_or_before="2020-02-29",
            returning="category",
        ),
        ever_smoked=patients.with_these_clinical_events(
            filter_codes_by_category(clear_smoking_codes, include=["S", "E"]),
            on_or_before="2020-02-29",
        ),
    ),
    smoking_status_date=patients.with_these_clinical_events(
        clear_smoking_codes,
        on_or_before="2020-02-29",
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    #### HIGH DOSE ICS - all preparation
    high_dose_ics=patients.with_these_medications(
        high_dose_ics_med_codes,
        between=["2019-11-01", "2020-02-29"],
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2019-11-01",
                "latest": "2020-02-29"
            }
        },
    ),
    #### HIGH DOSE ICS - single ingredient preparations
    high_dose_ics_single_ing=patients.with_these_medications(
        high_dose_ics_single_ingredient_med_codes,
        between=["2019-11-01", "2020-02-29"],
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2019-11-01",
                "latest": "2020-02-29"
            }
        },
    ),
    #### HIGH DOSE ICS - multiple ingredient preparation
    high_dose_ics_multiple_ingredient=patients.with_these_medications(
        high_dose_ics_multiple_ingredient_med_codes,
        between=["2019-11-01", "2020-02-29"],
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2019-11-01",
                "latest": "2020-02-29"
            }
        },
    ),
    ### LOW-MED DOSE ICS - single ingredient preparations
    low_med_dose_ics_single_ingredient=patients.with_these_medications(
        low_medium_ics_single_ingredient_med_codes,
        between=["2019-11-01", "2020-02-29"],
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2019-11-01",
                "latest": "2020-02-29"
            },
        },
    ),
    ### LOW-MED DOSE ICS - multiple ingredient preparations
    low_med_dose_ics_multiple_ingredient=patients.with_these_medications(
        low_medium_ics_multiple_ingredient_med_codes,
        between=["2019-11-01", "2020-02-29"],
        return_last_date_in_period=True,
        include_month=True,
    ),
    ### LOW-MED DOSE ICS - all preparation
    low_med_dose_ics=patients.with_these_medications(
        low_medium_ics_med_codes,
        between=["2019-11-01", "2020-02-29"],
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2019-11-01",
                "latest": "2020-02-29"
            },
        },
    ),
    #### ICS SINGLE CONSTITUENT
    ics_single=patients.with_these_medications(
        ics_single_med_codes,
        between=["2019-11-01", "2020-02-29"],
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2019-11-01",
                "latest": "2020-02-29"
            },
        },
    ),
    #### ORAL STEROIDS SINGLE CONSTITUENT
    oral_steroids=patients.with_these_medications(
        oral_steroid_med_codes,
        between=["2019-11-01", "2020-02-29"],
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2019-11-01",
                "latest": "2020-02-29"
            },
        },
    ),
    #### SABA SINGLE CONSTITUENT
    saba_single=patients.with_these_medications(
        saba_med_codes,
        between=["2019-11-01", "2020-02-29"],
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2019-11-01",
                "latest": "2020-02-29"
            },
        },
    ),
    #### SAMA SINGLE CONSTITUENT
    sama_single=patients.with_these_medications(
        sama_med_codes,
        between=["2019-11-01", "2020-02-29"],
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2019-11-01",
                "latest": "2020-02-29"
            },
        },
    ),
    #### LABA SINGLE CONSTITUENT
    laba_single=patients.with_these_medications(
        single_laba_med_codes,
        between=["2019-11-01", "2020-02-29"],
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2019-11-01",
                "latest": "2020-02-29"
            },
        },
    ),
    #### LAMA SINGLE CONSTITUENT
    lama_single=patients.with_these_medications(
        single_lama_med_codes,
        between=["2019-11-01", "2020-02-29"],
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2019-11-01",
                "latest": "2020-02-29"
            },
        },
    ),
    #### LABA + ICS
    laba_ics=patients.with_these_medications(
        laba_ics_med_codes,
        between=["2019-11-01", "2020-02-29"],
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2019-11-01",
                "latest": "2020-02-29"
            },
        },
    ),
    #### LABA + LAMA
    laba_lama=patients.with_these_medications(
        laba_lama_med_codes,
        between=["2019-11-01", "2020-02-29"],
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2019-11-01",
                "latest": "2020-02-29"
            },
        },
    ),
    #### LABA + LAMA + ICS
    laba_lama_ics=patients.with_these_medications(
        laba_lama__ics_med_codes,
        between=["2019-11-01", "2020-02-29"],
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2019-11-01",
                "latest": "2020-02-29"
            },
        },
    ),
    #### LTRA SINGLE CONSTITUENT
    ltra_single=patients.with_these_medications(
        leukotriene_med_codes,
        between=["2019-11-01", "2020-02-29"],
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2019-11-01",
                "latest": "2020-02-29"
            },
        },
    ),
    ### OXYGEN THERAPY LEFT OUT AT PRESENT DUE TO POOR RECORDS
    ### COPD
    copd=patients.with_these_clinical_events(
        copd_codes,
        on_or_before="2020-02-29",
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    ### OTHER RESPIRATORY
    other_respiratory=patients.with_these_clinical_events(
        other_respiratory_codes,
        on_or_before="2020-02-29",
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    ### ASTHMA EVER
    asthma_ever=patients.with_these_clinical_events(
        asthma_ever_codes,
        on_or_before="2020-02-29",
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    ### OTHER HEART DISEASE
    other_heart_disease=patients.with_these_clinical_events(
        other_heart_disease_codes,
        on_or_before="2020-02-29",
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    ### ILI
    ili=patients.with_these_clinical_events(
        ili_codes,
        return_first_date_in_period=True,
        include_month=True,
        between=["2016-09-01", "2020-02-29"],
        return_expectations={
            "date": {
                "earliest": "2019-09-01",
                "latest": "2020-02-29"
            }
        },
    ),
    ### HYPERTENSION
    hypertension=patients.with_these_clinical_events(
        hypertension_codes,
        on_or_before="2020-02-29",
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    ### HEART FAILURE
    heart_failure=patients.with_these_clinical_events(
        heart_failure_codes,
        on_or_before="2020-02-29",
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    #### SYSTOLIC BLOOD PRESSURE
    bp_sys=patients.mean_recorded_value(
        systolic_blood_pressure_codes,
        on_most_recent_day_of_measurement=True,
        on_or_before="2020-02-29",
        include_measurement_date=True,
        include_month=True,
        return_expectations={
            "float": {
                "distribution": "normal",
                "mean": 80,
                "stddev": 10
            },
            "date": {
                "latest": "2020-02-29"
            },
            "incidence": 0.95,
        },
    ),
    ### DIASTOLIC BLOOD PRESSURE
    bp_dias=patients.mean_recorded_value(
        diastolic_blood_pressure_codes,
        on_most_recent_day_of_measurement=True,
        on_or_before="2020-02-29",
        include_measurement_date=True,
        include_month=True,
        return_expectations={
            "float": {
                "distribution": "normal",
                "mean": 120,
                "stddev": 10
            },
            "date": {
                "latest": "2020-02-29"
            },
            "incidence": 0.95,
        },
    ),
    ### DIABETES
    diabetes=patients.with_these_clinical_events(
        diabetes_codes,
        on_or_before="2020-02-29",
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    hba1c_mmol_per_mol=patients.with_these_clinical_events(
        hba1c_new_codes,
        find_last_match_in_period=True,
        on_or_before="2020-02-29",
        returning="numeric_value",
        include_date_of_match=True,
        include_month=True,
        return_expectations={
            "date": {
                "latest": "2020-02-29"
            },
            "float": {
                "distribution": "normal",
                "mean": 40.0,
                "stddev": 20
            },
            "incidence": 0.95,
        },
    ),
    hba1c_percentage=patients.with_these_clinical_events(
        hba1c_old_codes,
        find_last_match_in_period=True,
        on_or_before="2020-02-29",
        returning="numeric_value",
        include_date_of_match=True,
        include_month=True,
        return_expectations={
            "date": {
                "latest": "2020-02-29"
            },
            "float": {
                "distribution": "normal",
                "mean": 5,
                "stddev": 2
            },
            "incidence": 0.95,
        },
    ),
    ### CANCER - 3 TYPES
    lung_cancer=patients.with_these_clinical_events(
        lung_cancer_codes,
        on_or_before="2020-02-29",
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    haem_cancer=patients.with_these_clinical_events(
        haem_cancer_codes,
        on_or_before="2020-02-29",
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    other_cancer=patients.with_these_clinical_events(
        other_cancer_codes,
        on_or_before="2020-02-29",
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    # IMMUNOSUPPRESSION - 4 TYPES
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/36
    aplastic_anaemia=patients.with_these_clinical_events(
        aplastic_codes,
        on_or_before="2020-02-29",
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    hiv=patients.with_these_clinical_events(
        hiv_codes,
        on_or_before="2020-02-29",
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    permanent_immunodeficiency=patients.with_these_clinical_events(
        permanent_immune_codes,
        on_or_before="2020-02-29",
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    temporary_immunodeficiency=patients.with_these_clinical_events(
        temp_immune_codes,
        between=["2019-03-01", "2020-02-29"],
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2019-03-01",
                "latest": "2020-02-29"
            }
        },
    ),
    ### CHRONIC KIDNEY DISEASE
    creatinine=patients.with_these_clinical_events(
        creatinine_codes,
        find_last_match_in_period=True,
        between=["2019-02-28", "2020-02-29"],
        returning="numeric_value",
        include_date_of_match=True,
        include_month=True,
        return_expectations={
            "float": {
                "distribution": "normal",
                "mean": 60.0,
                "stddev": 15
            },
            "date": {
                "earliest": "2019-02-28",
                "latest": "2020-02-29"
            },
            "incidence": 0.95,
        },
    ),
    #### end stage renal disease codes incl. dialysis / transplant
    esrf=patients.with_these_clinical_events(
        ckd_codes,
        on_or_before="2020-02-29",
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-02-29"
        }},
    ),
    ### VACCINATION HISTORY
    flu_vaccine_tpp_table=patients.with_tpp_vaccination_record(
        target_disease_matches="INFLUENZA",
        between=["2019-09-01", "2020-02-29"],  # current flu season
        find_first_match_in_period=True,
        returning="date",
        return_expectations={
            "date": {
                "earliest": "2019-09-01",
                "latest": "2020-02-29"
            }
        },
    ),
    flu_vaccine_med=patients.with_these_medications(
        flu_med_codes,
        between=["2019-09-01", "2020-02-29"],  # current flu season
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2019-09-01",
                "latest": "2020-02-29"
            }
        },
    ),
    flu_vaccine_clinical=patients.with_these_clinical_events(
        flu_clinical_given_codes,
        ignore_days_where_these_codes_occur=flu_clinical_not_given_codes,
        between=["2019-09-01", "2020-02-29"],  # current flu season
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2019-09-01",
                "latest": "2020-02-29"
            }
        },
    ),
    flu_vaccine=patients.satisfying(
        """
        flu_vaccine_tpp_table OR
        flu_vaccine_med OR
        flu_vaccine_clinical
        """, ),
    # PNEUMOCOCCAL VACCINE
    pneumococcal_vaccine_tpp_table=patients.with_tpp_vaccination_record(
        target_disease_matches="PNEUMOCOCCAL",
        between=["2015-03-01", "2020-02-29"],
        find_first_match_in_period=True,
        returning="date",
        return_expectations={
            "date": {
                "earliest": "2015-03-01",
                "latest": "2020-02-29"
            }
        },
    ),
    pneumococcal_vaccine_med=patients.with_these_medications(
        pneumococcal_med_codes,
        between=["2015-03-01", "2020-02-29"],  # past five years
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2015-03-01",
                "latest": "2020-02-29"
            }
        },
    ),
    pneumococcal_vaccine_clinical=patients.with_these_clinical_events(
        pneumococcal_clinical_given_codes,
        ignore_days_where_these_codes_occur=
        pneumococcal_clinical_not_given_codes,
        between=["2015-03-01", "2020-02-29"],  # past five years
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2015-03-01",
                "latest": "2020-02-29"
            }
        },
    ),
    pneumococcal_vaccine=patients.satisfying(
        """
        pneumococcal_vaccine_tpp_table OR
        pneumococcal_vaccine_med OR
        pneumococcal_vaccine_clinical
        """, ),
    ### EXACERBATION
    # count
    exacerbation_count=patients.with_these_medications(
        oral_steroid_med_codes,
        between=["2019-03-01", "2020-02-29"],
        ignore_days_where_these_clinical_codes_occur=combine_codelists(
            sle_codes,
            interstital_lung_codes,
            ra_codes,
            ms_codes,
            temporal_arteritis_codes,
        ),
        returning="number_of_episodes",
        episode_defined_as="series of events each <= 14 days apart",
        return_expectations={
            "int": {
                "distribution": "normal",
                "mean": 2,
                "stddev": 1
            },
            "incidence": 0.2,
        },
    ),
    # # binary flag
    exacerbations=patients.satisfying(
        """
        exacerbation_count > 0
        """, ),
    ### INSULIN USE
    insulin=patients.with_these_medications(
        insulin_med_codes,
        between=["2019-11-01", "2020-02-29"],
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2019-11-01",
                "latest": "2020-02-29"
            }
        },
    ),
    ### STATIN USE
    statin=patients.with_these_medications(
        statin_med_codes,
        between=["2019-11-01", "2020-02-29"],
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2019-11-01",
                "latest": "2020-02-29"
            }
        },
    ),
    ### GP CONSULTATION RATE
    gp_consult_count=patients.with_gp_consultations(
        between=["2019-03-01", "2020-02-29"],
        returning="number_of_matches_in_period",
        return_expectations={
            "int": {
                "distribution": "normal",
                "mean": 4,
                "stddev": 2
            },
            "date": {
                "earliest": "2019-03-01",
                "latest": "2020-02-29"
            },
            "incidence": 0.7,
        },
    ),
    has_consultation_history=patients.
    with_complete_gp_consultation_history_between(
        "2019-03-01",
        "2020-02-29",
        return_expectations={"incidence": 0.9},
    ),
)
Beispiel #7
0
study = StudyDefinition(
    default_expectations={
        "date": {
            "earliest": "1900-01-01",
            "latest": "today"
        },
        "rate": "uniform",
        "incidence": 0.5,
    },
    population=patients.registered_with_one_practice_between(
        "2019-02-01", "2020-02-01"),

    # Set index date to start date
    index_date="2020-02-01",

    ## DEMOGRAPHIC INFORMATION
    ### Age
    age=patients.age_as_of(
        "2020-03-31",
        return_expectations={
            "rate": "universal",
            "int": {
                "distribution": "population_ages"
            },
            "incidence": 0.001
        },
    ),

    ### Sex
    sex=patients.sex(return_expectations={
        "rate": "universal",
        "category": {
            "ratios": {
                "M": 0.49,
                "F": 0.51
            }
        },
    }),

    ### Ethnicity
    ethnicity=patients.with_these_clinical_events(
        ethnicity_codes,
        returning="category",
        find_last_match_in_period=True,
        on_or_before="index_date",
        return_expectations={
            "category": {
                "ratios": {
                    "1": 0.25,
                    "2": 0.05,
                    "3": 0.05,
                    "4": 0.05,
                    "5": 0.05,
                    "6": 0.05,
                    "7": 0.05,
                    "8": 0.05,
                    "9": 0.05,
                    "10": 0.05,
                    "11": 0.05,
                    "12": 0.05,
                    "13": 0.05,
                    "14": 0.05,
                    "15": 0.05,
                    "16": 0.05,
                }
            },
            "incidence": 0.75,
        },
    ),
    ### BMI
    bmi=patients.with_these_clinical_events(
        bmi_codes,
        returning="numeric_value",
        ignore_missing_values=True,
        find_last_match_in_period=True,
        on_or_before="index_date",
        return_expectations={
            "float": {
                "distribution": "normal",
                "mean": 28,
                "stddev": 5
            },
        },
    ),

    ### Diabetes
    diabetes=patients.with_these_clinical_events(
        diabetes_codes,
        returning="binary_flag",
        find_last_match_in_period=True,
        on_or_before="index_date",
        return_expectations={"incidence": 0.10},
    ),

    ### Chronic liver disease
    chronic_liver_disease=patients.with_these_clinical_events(
        chronis_liver_disease_codes,
        on_or_before="index_date",
        returning="binary_flag",
        return_expectations={"incidence": 0.01},
    ),

    ### Index of multiple deprivation
    imd=patients.categorised_as(
        {
            "0": "DEFAULT",
            "1":
            """index_of_multiple_deprivation >=1 AND index_of_multiple_deprivation < 32844*1/5""",
            "2":
            """index_of_multiple_deprivation >= 32844*1/5 AND index_of_multiple_deprivation < 32844*2/5""",
            "3":
            """index_of_multiple_deprivation >= 32844*2/5 AND index_of_multiple_deprivation < 32844*3/5""",
            "4":
            """index_of_multiple_deprivation >= 32844*3/5 AND index_of_multiple_deprivation < 32844*4/5""",
            "5": """index_of_multiple_deprivation >= 32844*4/5 """,
        },
        index_of_multiple_deprivation=patients.address_as_of(
            "index_date",
            returning="index_of_multiple_deprivation",
            round_to_nearest=100,
        ),
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "0": 0.01,
                    "1": 0.20,
                    "2": 0.20,
                    "3": 0.20,
                    "4": 0.20,
                    "5": 0.19,
                }
            },
        }),
    ### Region - NHS England 9 regions
    region=patients.registered_practice_as_of(
        "index_date",
        returning="nuts1_region_name",
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "North East": 0.1,
                    "North West": 0.1,
                    "Yorkshire and The Humber": 0.1,
                    "East Midlands": 0.1,
                    "West Midlands": 0.1,
                    "East": 0.1,
                    "London": 0.2,
                    "South West": 0.1,
                    "South East": 0.1,
                },
            },
        },
    ),

    ### STP (regional grouping of practices)
    stp=patients.registered_practice_as_of(
        "index_date",
        returning="stp_code",
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "STP1": 0.1,
                    "STP2": 0.1,
                    "STP3": 0.1,
                    "STP4": 0.1,
                    "STP5": 0.1,
                    "STP6": 0.1,
                    "STP7": 0.1,
                    "STP8": 0.1,
                    "STP9": 0.1,
                    "STP10": 0.1,
                }
            },
        },
    ),

    ### Urban vs rural
    rural_urban=patients.address_as_of(
        "index_date",
        returning="rural_urban_classification",
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    1: 0.125,
                    2: 0.125,
                    3: 0.125,
                    4: 0.125,
                    5: 0.125,
                    6: 0.125,
                    7: 0.125,
                    8: 0.125
                }
            },
        },
    ),

    ### History of covid
    prior_covid_date=patients.with_these_clinical_events(
        combine_codelists(
            covid_primary_care_code,
            covid_primary_care_positive_test,
            covid_primary_care_sequalae,
        ),
        returning="date",
        date_format="YYYY-MM-DD",
        on_or_before="index_date",
        find_first_match_in_period=True,
        return_expectations={"rate": "exponential_increase"},
    ),
)
study = StudyDefinition(
    # Configure the expectations framework
    default_expectations={
        "date": {
            "earliest": "2020-01-01",
            "latest": "today"
        },
        "rate": "universal",
    },

    # define the study index date
    index_date=index_date,

    # This line defines the study population
    population=patients.satisfying(
        "(NOT died) AND (registered)",
        died=patients.died_from_any_cause(on_or_before=index_date,
                                          returning="binary_flag"),
        registered=patients.registered_as_of(index_date),
    ),
    age=patients.age_as_of(index_date,
                           return_expectations={
                               "rate": "universal",
                               "int": {
                                   "distribution": "population_ages"
                               }
                           }),
    age_group=patients.categorised_as(
        {
            "0": "DEFAULT",
            "16 - under 40": """ age >= 16 AND age < 40""",
            "40 - under 50": """ age >= 40 AND age < 50""",
            "50 - under 65": """ age >= 50 AND age < 65""",
            "65 plus": """ age >=  65""",
        },
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "16 - under 40": 0.25,
                    "40 - under 50": 0.25,
                    "50 - under 65": 0.25,
                    "65 plus": 0.25,
                }
            },
        },
    ),
    stp=patients.registered_practice_as_of(
        "index_date",
        returning="stp_code",
        return_expectations={
            "category": {
                "ratios": {
                    "STP1": 0.5,
                    "STP2": 0.5
                }
            },
        },
    ),
    first_dose=patients.with_these_clinical_events(
        first_dose_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    second_dose=patients.with_these_clinical_events(
        second_dose_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    ethnicity_white_british=patients.with_these_clinical_events(
        ethnicity_white_british_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    ethnicity_white_irish=patients.with_these_clinical_events(
        ethnicity_white_irish_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    ethnicity_white_other=patients.with_these_clinical_events(
        ethnicity_white_other_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    ethnicity_mixed_white_caribbean=patients.with_these_clinical_events(
        ethnicity_mixed_white_caribbean_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    ethnicity_mixed_white_african=patients.with_these_clinical_events(
        ethnicity_mixed_white_african_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    ethnicity_mixed_white_asian=patients.with_these_clinical_events(
        ethnicity_mixed_white_asian_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    ethnicity_mixed_other=patients.with_these_clinical_events(
        ethnicity_mixed_other_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    ethnicity_asian_indian=patients.with_these_clinical_events(
        ethnicity_asian_indian_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    ethnicity_asian_pak=patients.with_these_clinical_events(
        ethnicity_asian_pak_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    ethnicity_mixed_asian_bang=patients.with_these_clinical_events(
        ethnicity_mixed_asian_bang_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    ethnicity_asian_other=patients.with_these_clinical_events(
        ethnicity_asian_other_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    ethnicity_black_caribbean=patients.with_these_clinical_events(
        ethnicity_black_caribbean_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    ethnicity_black_african=patients.with_these_clinical_events(
        ethnicity_black_african_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    ethnicity_black_other=patients.with_these_clinical_events(
        ethnicity_black_other_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    ethnicity_chinese=patients.with_these_clinical_events(
        ethnicity_chinese_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    ethnicity_other=patients.with_these_clinical_events(
        ethnicity_other_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    ethnicity_non_other=patients.with_these_clinical_events(
        ethnicity_non_other_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    ethnicity_not_stated=patients.with_these_clinical_events(
        ethnicity_not_stated_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    ethnicity_not_recorded=patients.with_these_clinical_events(
        ethnicity_not_recorded_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    ethnicity_not_given_refused=patients.with_these_clinical_events(
        ethnicity_not_given_refused_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    ethnicity=patients.categorised_as(
        {
            "0": "DEFAULT",
            "white_british": """ (ethnicity_white_british) """,
            "white_irish": """ (ethnicity_white_irish) """,
            "white_other": """ (ethnicity_white_other) """,
            "mixed_white_caribbean": """ (ethnicity_mixed_white_caribbean) """,
            "mixed_white_african": """ (ethnicity_mixed_white_african) """,
            "mixed_white_asian": """ (ethnicity_mixed_white_asian) """,
            "mixed_other": """ (ethnicity_mixed_other) """,
            "asian_indian": """ (ethnicity_asian_indian) """,
            "asian_pak": """ (ethnicity_asian_pak) """,
            "mixed_asian_bang": """ (ethnicity_mixed_asian_bang) """,
            "asian_other": """ (ethnicity_asian_other) """,
            "black_caribbean": """ (ethnicity_black_caribbean) """,
            "black_african": """ (ethnicity_black_african) """,
            "black_other": """ (ethnicity_black_other) """,
            "chinese": """ (ethnicity_chinese) """,
            "other": """ (ethnicity_other) """,
            "non_other": """ (ethnicity_non_other) """,
            "not_stated": """ (ethnicity_not_stated) """,
            "not_recorded": """ (ethnicity_not_recorded) """,
            "not_given_refused": """ (ethnicity_not_given_refused) """,
        },
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "white_british": 0.05,
                    "white_irish": 0.05,
                    "white_other": 0.05,
                    "mixed_white_caribbean": 0.05,
                    "mixed_white_african": 0.05,
                    "mixed_white_asian": 0.05,
                    "mixed_other": 0.05,
                    "asian_indian": 0.05,
                    "asian_pak": 0.05,
                    "mixed_asian_bang": 0.05,
                    "asian_other": 0.05,
                    "black_caribbean": 0.05,
                    "black_african": 0.05,
                    "black_other": 0.05,
                    "chinese": 0.05,
                    "other": 0.05,
                    "non_other": 0.05,
                    "not_stated": 0.05,
                    "not_recorded": 0.05,
                    "not_given_refused": 0.05,
                }
            },
        },
    ),
)
Beispiel #9
0
study = StudyDefinition(
    # Configure the expectations framework
    default_expectations={
        "date": {
            "earliest": "1900-01-01",
            "latest": "today"
        },
        "rate": "exponential_increase",
    },

    # Define the study population
    population=patients.registered_with_one_practice_between(
        "2019-02-01", "2020-02-01"),

    # Define input variables {
    age=patients.age_as_of(
        "2020-02-01",
        return_expectations={
            "rate": "universal",
            "int": {
                "distribution": "population_ages"
            },
        },
    ),
    sex=patients.sex(return_expectations={
        "rate": "universal",
        "category": {
            "ratios": {
                "M": 0.49,
                "F": 0.51
            }
        },
    }),
    cardiac_disease=patients.with_these_clinical_events(
        cardiac_disease_codes,
        returning="binary_flag",
        return_expectations={"incidence": 0.2},
    ),
    # }

    # Define output variable
    covid_on_death_certificate=patients.with_these_codes_on_death_certificate(
        covid_codes,
        match_only_underlying_cause=False,
        return_expectations={"incidence": 0.001},
    ),
)
def test_validate_category_expectations():
    categorised_codelist = codelist([("X", "Y")], system="ctv3")
    categorised_codelist.has_categories = True

    category_definitions = {"A": "sex = 'F'", "B": "sex = 'M'"}
    study = StudyDefinition(population=patients.all())

    # validate against codelists
    with pytest.raises(ValueError):
        study.validate_category_expectations(
            codelist=categorised_codelist,
            return_expectations={"category": {
                "ratios": {
                    "X": 1
                }
            }},
        )
    study.validate_category_expectations(
        codelist=categorised_codelist,
        return_expectations={"category": {
            "ratios": {
                "Y": 1
            }
        }},
    )

    # validate against definitions
    with pytest.raises(ValueError):
        study.validate_category_expectations(
            category_definitions=category_definitions,
            return_expectations={"category": {
                "ratios": {
                    "X": 1
                }
            }},
        )
    study.validate_category_expectations(
        category_definitions=category_definitions,
        return_expectations={"category": {
            "ratios": {
                "A": 1
            }
        }},
    )

    # validate that supplied category definitions override categories
    # in codelists
    with pytest.raises(ValueError):
        study.validate_category_expectations(
            codelist=categorised_codelist,
            category_definitions=category_definitions,
            return_expectations={"category": {
                "ratios": {
                    "Y": 1
                }
            }},
        )
    study.validate_category_expectations(
        codelist=categorised_codelist,
        category_definitions=category_definitions,
        return_expectations={"category": {
            "ratios": {
                "A": 1
            }
        }},
    )
Beispiel #11
0
study = StudyDefinition(
    default_expectations={
        "date": {
            "earliest": "1970-01-01",
            "latest": "today"
        },
        "rate": "uniform",
        "incidence": 0.2,
    },

    # STUDY POPULATION - registered 1 year prior to November 16th 2020
    population=patients.satisfying(
        "one_practice AND sgss_pos_inrange",
        one_practice=patients.registered_with_one_practice_between(
            "2019-11-16", "2020-11-16"),
    ),
    dereg_date=patients.date_deregistered_from_all_supported_practices(
        on_or_after="2020-11-16",
        date_format="YYYY-MM",
    ),

    # OUTCOMES - ONS death dates
    died_ons_covid_flag_any=patients.with_these_codes_on_death_certificate(
        covid_codelist,
        on_or_after="2020-02-01",
        match_only_underlying_cause=False,
        return_expectations={
            "date": {
                "earliest": "2020-02-01"
            },
            "incidence": 0.5
        },
    ),
    died_date_ons=patients.died_from_any_cause(
        on_or_after="2020-02-01",
        returning="date_of_death",
        include_month=True,
        include_day=True,
        return_expectations={
            "date": {
                "earliest": "2020-08-01"
            },
            "incidence": 0.5
        },
    ),

    ### Primary care COVID cases
    covid_tpp_probable=patients.with_these_clinical_events(
        combine_codelists(
            covid_identification_in_primary_care_case_codes_clinical,
            covid_identification_in_primary_care_case_codes_test,
            covid_identification_in_primary_care_case_codes_seq),
        return_first_date_in_period=True,
        include_day=True,
        return_expectations={
            "date": {
                "earliest": "2020-10-20"
            },
            "incidence": 0.2
        },
    ),

    # Any COVID vaccination (first dose)
    covid_vacc_date=patients.with_tpp_vaccination_record(
        target_disease_matches="SARS-2 CORONAVIRUS",
        on_or_after="2020-12-01",  # check all december to date
        find_first_match_in_period=True,
        returning="date",
        date_format="YYYY-MM-DD",
        return_expectations={
            "date": {
                "earliest":
                "2020-12-08",  # first vaccine administered on the 8/12
                "latest": "2021-01-31",
            },
            "incidence": 0.1
        },
    ),

    ### COVID test positive (SGSS)
    first_pos_test_sgss=patients.with_test_result_in_sgss(
        pathogen="SARS-CoV-2",
        test_result="positive",
        find_first_match_in_period=True,
        returning="date",
        date_format="YYYY-MM-DD",
        return_expectations={
            "date": {
                "earliest": "2020-06-01"
            },
            "incidence": 0.4
        },
    ),

    ### SGSS positive in study period
    sgss_pos_inrange=patients.with_test_result_in_sgss(
        pathogen="SARS-CoV-2",
        test_result="positive",
        find_first_match_in_period=True,
        between=["2020-11-16", "2021-01-11"],
        returning="date",
        date_format="YYYY-MM-DD",
        return_expectations={
            "date": {
                "earliest": "2020-11-16",
                "latest": "2021-01-11"
            },
            "incidence": 0.9
        },
    ),
    sgtf=patients.with_test_result_in_sgss(
        pathogen="SARS-CoV-2",
        test_result="positive",
        find_first_match_in_period=True,
        between=["2020-11-16", "2021-01-11"],
        returning="s_gene_target_failure",
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "0": 0.4,
                    "1": 0.4,
                    "9": 0.1,
                    "": 0.1
                }
            },
        },
    ),

    # SUS HOSPITAL ADMISSION
    covid_admission_date=patients.admitted_to_hospital(
        returning="date_admitted",
        with_these_diagnoses=covid_codelist,
        on_or_after="sgss_pos_inrange",
        find_first_match_in_period=True,
        date_format="YYYY-MM-DD",
        return_expectations={
            "date": {
                "earliest": "2020-11-16"
            },
            "incidence": 0.3
        },
    ),
    covid_discharge_date=patients.admitted_to_hospital(
        returning="date_discharged",
        with_these_diagnoses=covid_codelist,
        on_or_after="sgss_pos_inrange",
        find_first_match_in_period=True,
        date_format="YYYY-MM-DD",
        return_expectations={
            "date": {
                "earliest": "2020-11-16"
            },
            "incidence": 0.2
        },
    ),

    # DAYS SPENT IN ICU
    covid_icu_days=patients.admitted_to_hospital(
        returning="days_in_critical_care",
        with_these_diagnoses=covid_codelist,
        on_or_after="sgss_pos_inrange",
        find_first_match_in_period=True,
        return_expectations={
            "category": {
                "ratios": {
                    "10": 0.5,
                    "20": 0.5
                }
            },
            "incidence": 0.4,
        },
    ),

    # ICU ADMISSION
    icu_admission_date=patients.admitted_to_icu(
        on_or_after="sgss_pos_inrange",
        find_first_match_in_period=True,
        returning="date_admitted",
        date_format="YYYY-MM-DD",
        return_expectations={
            "date": {
                "earliest": "2020-11-16"
            },
            "incidence": 0.2
        },
    ),

    ### DEMOGRAPHIC COVARIATES
    # AGE
    age=patients.age_as_of(
        "sgss_pos_inrange",
        return_expectations={
            "rate": "universal",
            "int": {
                "distribution": "population_ages"
            },
        },
    ),

    # SEX
    sex=patients.sex(return_expectations={
        "rate": "universal",
        "category": {
            "ratios": {
                "M": 0.49,
                "F": 0.51
            }
        },
    }),

    # DEPRIVIATION
    imd=patients.address_as_of(
        "sgss_pos_inrange",
        returning="index_of_multiple_deprivation",
        round_to_nearest=100,
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "100": 0.1,
                    "200": 0.2,
                    "300": 0.2,
                    "400": 0.2,
                    "500": 0.2,
                    "600": 0.1
                }
            },
        },
    ),

    # GEOGRAPHIC REGION CALLED STP
    stp=patients.registered_practice_as_of(
        "sgss_pos_inrange",
        returning="stp_code",
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "STP1": 0.1,
                    "STP2": 0.1,
                    "STP3": 0.1,
                    "STP4": 0.1,
                    "STP5": 0.1,
                    "STP6": 0.1,
                    "STP7": 0.1,
                    "STP8": 0.1,
                    "STP9": 0.1,
                    "STP10": 0.1,
                }
            },
        },
    ),

    # GEOGRAPHIC REGION MSOA
    msoa=patients.registered_practice_as_of(
        "sgss_pos_inrange",
        returning="msoa_code",
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": dict_msoa
            },
        },
    ),

    # REGION - one of NHS England 9 regions
    region=patients.registered_practice_as_of(
        "sgss_pos_inrange",
        returning="nuts1_region_name",
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "North East": 0.1,
                    "North West": 0.1,
                    "Yorkshire and The Humber": 0.1,
                    "East Midlands": 0.1,
                    "West Midlands": 0.1,
                    "East of England": 0.1,
                    "London": 0.2,
                    "South East": 0.1,
                    "South West": 0.1,
                },
            },
        },
    ),

    # RURAL OR URBAN LOCATION
    rural_urban=patients.address_as_of(
        "sgss_pos_inrange",
        returning="rural_urban_classification",
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "1": 0.1,
                    "2": 0.1,
                    "3": 0.1,
                    "4": 0.1,
                    "5": 0.1,
                    "6": 0.1,
                    "7": 0.2,
                    "8": 0.2,
                }
            },
        },
    ),

    # HOUSEHOLD INFORMATION
    household_id=patients.household_as_of(
        "2020-02-01",
        returning="pseudo_id",
        return_expectations={
            "int": {
                "distribution": "normal",
                "mean": 1000,
                "stddev": 200
            },
            "incidence": 1,
        },
    ),
    household_size=patients.household_as_of(
        "2020-02-01",
        returning="household_size",
        return_expectations={
            "int": {
                "distribution": "normal",
                "mean": 3,
                "stddev": 1
            },
            "incidence": 1,
        },
    ),
    care_home_type=patients.care_home_status_as_of(
        "sgss_pos_inrange",
        categorised_as={
            "PC": """
              IsPotentialCareHome
              AND LocationDoesNotRequireNursing='Y'
              AND LocationRequiresNursing='N'
            """,
            "PN": """
              IsPotentialCareHome
              AND LocationDoesNotRequireNursing='N'
              AND LocationRequiresNursing='Y'
            """,
            "PS": "IsPotentialCareHome",
            "U": "DEFAULT",
        },
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "PC": 0.05,
                    "PN": 0.05,
                    "PS": 0.05,
                    "U": 0.85,
                },
            },
        },
    ),

    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/10
    bmi=patients.most_recent_bmi(
        between=["2010-02-01", "sgss_pos_inrange"],
        minimum_age_at_measurement=16,
        include_measurement_date=True,
        include_month=True,
        return_expectations={
            "date": {},
            "float": {
                "distribution": "normal",
                "mean": 35,
                "stddev": 10
            },
            "incidence": 0.95,
        },
    ),

    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/6
    smoking_status=patients.categorised_as(
        {
            "S": "most_recent_smoking_code = 'S'",
            "E": """
                 most_recent_smoking_code = 'E' OR (
                   most_recent_smoking_code = 'N' AND ever_smoked
                 )
            """,
            "N": "most_recent_smoking_code = 'N' AND NOT ever_smoked",
            "M": "DEFAULT",
        },
        return_expectations={
            "category": {
                "ratios": {
                    "S": 0.6,
                    "E": 0.1,
                    "N": 0.2,
                    "M": 0.1
                }
            }
        },
        most_recent_smoking_code=patients.with_these_clinical_events(
            clear_smoking_codes,
            find_last_match_in_period=True,
            on_or_before="sgss_pos_inrange",
            returning="category",
        ),
        ever_smoked=patients.with_these_clinical_events(
            filter_codes_by_category(clear_smoking_codes, include=["S", "E"]),
            on_or_before="sgss_pos_inrange",
        ),
    ),

    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/27
    ethnicity=patients.with_these_clinical_events(
        ethnicity_codes,
        returning="category",
        find_last_match_in_period=True,
        include_date_of_match=True,
        return_expectations={
            "category": {
                "ratios": {
                    "1": 0.75,
                    "2": 0.05,
                    "3": 0.05,
                    "4": 0.05,
                    "5": 0.1
                }
            },
            "incidence": 0.75,
        },
    ),
    ethnicity_16=patients.with_these_clinical_events(
        ethnicity_codes_16,
        returning="category",
        find_last_match_in_period=True,
        include_date_of_match=True,
        return_expectations={
            "category": {
                "ratios": {
                    "1": 0.8,
                    "5": 0.1,
                    "3": 0.1
                }
            },
            "incidence": 0.75,
        },
    ),

    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/21
    chronic_respiratory_disease=patients.with_these_clinical_events(
        chronic_respiratory_disease_codes,
        return_first_date_in_period=True,
        include_month=True,
    ),

    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/55
    asthma=patients.categorised_as(
        {
            "0":
            "DEFAULT",
            "1":
            """
                (
                  recent_asthma_code OR (
                    asthma_code_ever AND NOT
                    copd_code_ever
                  )
                ) AND (
                  prednisolone_last_year = 0 OR 
                  prednisolone_last_year > 4
                )
            """,
            "2":
            """
                (
                  recent_asthma_code OR (
                    asthma_code_ever AND NOT
                    copd_code_ever
                  )
                ) AND
                prednisolone_last_year > 0 AND
                prednisolone_last_year < 5
                
            """,
        },
        return_expectations={
            "category": {
                "ratios": {
                    "0": 0.8,
                    "1": 0.1,
                    "2": 0.1
                }
            },
        },
        recent_asthma_code=patients.with_these_clinical_events(
            asthma_codes,
            between=["2017-02-01", "2020-11-16"],
        ),
        asthma_code_ever=patients.with_these_clinical_events(asthma_codes),
        copd_code_ever=patients.with_these_clinical_events(
            chronic_respiratory_disease_codes),
        prednisolone_last_year=patients.with_these_medications(
            pred_codes,
            between=["2019-11-16", "2020-11-16"],
            returning="number_of_matches_in_period",
        ),
    ),

    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/7
    chronic_cardiac_disease=patients.with_these_clinical_events(
        chronic_cardiac_disease_codes,
        return_first_date_in_period=True,
        include_month=True,
    ),

    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/30
    diabetes=patients.with_these_clinical_events(
        diabetes_codes,
        return_first_date_in_period=True,
        include_month=True,
    ),

    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/32
    lung_cancer=patients.with_these_clinical_events(
        lung_cancer_codes,
        return_first_date_in_period=True,
        include_month=True,
    ),
    haem_cancer=patients.with_these_clinical_events(
        haem_cancer_codes,
        return_first_date_in_period=True,
        include_month=True,
    ),
    other_cancer=patients.with_these_clinical_events(
        other_cancer_codes,
        return_first_date_in_period=True,
        include_month=True,
    ),

    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/12
    chronic_liver_disease=patients.with_these_clinical_events(
        chronic_liver_disease_codes,
        return_first_date_in_period=True,
        include_month=True,
    ),

    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/14
    other_neuro=patients.with_these_clinical_events(
        other_neuro,
        return_first_date_in_period=True,
        include_month=True,
    ),
    stroke=patients.with_these_clinical_events(
        stroke,
        return_first_date_in_period=True,
        include_month=True,
    ),
    dementia=patients.with_these_clinical_events(
        dementia,
        return_first_date_in_period=True,
        include_month=True,
    ),

    # Chronic kidney disease
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/17
    creatinine=patients.with_these_clinical_events(
        creatinine_codes,
        find_last_match_in_period=True,
        on_or_before="2020-11-16",
        returning="numeric_value",
        include_date_of_match=True,
        include_month=True,
        return_expectations={
            "float": {
                "distribution": "normal",
                "mean": 60.0,
                "stddev": 15
            },
            "date": {
                "earliest": "2019-02-28",
                "latest": "2020-11-16"
            },
            "incidence": 0.95,
        },
    ),
    dialysis=patients.with_these_clinical_events(
        dialysis_codes,
        return_first_date_in_period=True,
        include_month=True,
    ),

    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/31
    organ_transplant=patients.with_these_clinical_events(
        organ_transplant_codes,
        return_first_date_in_period=True,
        include_month=True,
    ),

    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/13
    dysplenia=patients.with_these_clinical_events(
        spleen_codes,
        return_first_date_in_period=True,
        include_month=True,
    ),
    sickle_cell=patients.with_these_clinical_events(
        sickle_cell_codes,
        return_first_date_in_period=True,
        include_month=True,
    ),

    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/36
    aplastic_anaemia=patients.with_these_clinical_events(
        aplastic_codes,
        return_last_date_in_period=True,
        include_month=True,
    ),
    hiv=patients.with_these_clinical_events(
        hiv_codes,
        returning="category",
        find_first_match_in_period=True,
        include_date_of_match=True,
        include_month=True,
        return_expectations={
            "category": {
                "ratios": {
                    "43C3.": 0.8,
                    "XaFuL": 0.2
                }
            },
        },
    ),
    permanent_immunodeficiency=patients.with_these_clinical_events(
        permanent_immune_codes,
        return_first_date_in_period=True,
        include_month=True,
    ),
    temporary_immunodeficiency=patients.with_these_clinical_events(
        temp_immune_codes,
        return_last_date_in_period=True,
        include_month=True,
    ),

    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/23
    # immunosuppressant_med=
    # hypertension
    hypertension=patients.with_these_clinical_events(
        hypertension_codes,
        return_first_date_in_period=True,
        include_month=True,
    ),

    # Blood pressure
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/35
    bp_sys=patients.mean_recorded_value(
        systolic_blood_pressure_codes,
        on_most_recent_day_of_measurement=True,
        on_or_before="2020-11-16",
        include_measurement_date=True,
        include_month=True,
        return_expectations={
            "float": {
                "distribution": "normal",
                "mean": 80,
                "stddev": 10
            },
            "date": {
                "latest": "2020-11-16"
            },
            "incidence": 0.95,
        },
    ),
    bp_dias=patients.mean_recorded_value(
        diastolic_blood_pressure_codes,
        on_most_recent_day_of_measurement=True,
        on_or_before="2020-11-16",
        include_measurement_date=True,
        include_month=True,
        return_expectations={
            "float": {
                "distribution": "normal",
                "mean": 120,
                "stddev": 10
            },
            "date": {
                "latest": "2020-11-16"
            },
            "incidence": 0.95,
        },
    ),
    hba1c_mmol_per_mol=patients.with_these_clinical_events(
        hba1c_new_codes,
        find_last_match_in_period=True,
        on_or_before="2020-11-16",
        returning="numeric_value",
        include_date_of_match=True,
        include_month=True,
        return_expectations={
            "date": {
                "latest": "2020-11-16"
            },
            "float": {
                "distribution": "normal",
                "mean": 40.0,
                "stddev": 20
            },
            "incidence": 0.95,
        },
    ),
    hba1c_percentage=patients.with_these_clinical_events(
        hba1c_old_codes,
        find_last_match_in_period=True,
        on_or_before="2020-11-16",
        returning="numeric_value",
        include_date_of_match=True,
        include_month=True,
        return_expectations={
            "date": {
                "latest": "2020-11-16"
            },
            "float": {
                "distribution": "normal",
                "mean": 5,
                "stddev": 2
            },
            "incidence": 0.95,
        },
    ),

    # # https://github.com/ebmdatalab/tpp-sql-notebook/issues/49
    ra_sle_psoriasis=patients.with_these_clinical_events(
        ra_sle_psoriasis_codes,
        return_first_date_in_period=True,
        include_month=True,
    ),
)
def test_make_df_from_expectations_with_aggregate_of():
    # aggregate of variables defined in their own right
    study = StudyDefinition(
        default_expectations={
            "date": {
                "earliest": "1900-01-01",
                "latest": "today"
            },
            "rate": "exponential_increase",
            "incidence": 0.2,
        },
        population=patients.all(),
        date_1=patients.with_these_clinical_events(
            codelist(["X"], system="ctv3"),
            returning="date",
            date_format="YYYY-MM-DD",
        ),
        date_2=patients.with_these_clinical_events(
            codelist(["X"], system="ctv3"),
            returning="date",
            date_format="YYYY-MM-DD",
        ),
        date_min=patients.minimum_of(
            "date_1",
            "date_2",
        ),
        date_max=patients.maximum_of(
            "date_1",
            "date_2",
        ),
        date_min_fixed=patients.minimum_of(
            "date_1",
            "1980-10-20",
        ),
        int_1=patients.with_these_clinical_events(
            codelist(["X"], system="ctv3"),
            returning="number_of_matches_in_period",
            return_expectations={
                "int": {
                    "distribution": "normal",
                    "mean": 25,
                    "stddev": 5
                },
                "incidence": 0.5,
            },
        ),
        int_2=patients.with_these_clinical_events(
            codelist(["X"], system="ctv3"),
            returning="number_of_matches_in_period",
            return_expectations={
                "int": {
                    "distribution": "normal",
                    "mean": 25,
                    "stddev": 5
                },
                "incidence": 0.5,
            },
        ),
        int_min=patients.minimum_of("int_1", "int_2"),
        int_max=patients.maximum_of("int_1", "int_2"),
    )
    population_size = 10000
    result = study.make_df_from_expectations(population_size)
    for _, row in result.iterrows():
        print(row)
        dates = [
            d for d in [row["date_1"], row["date_2"]] if isinstance(d, str)
        ]
        if dates:
            date_min = min(dates)
            date_max = max(dates)
        else:
            date_min = float("nan")
            date_max = float("nan")
        assert_nan_equal(row["date_min"], date_min)
        assert_nan_equal(row["date_max"], date_max)
        ints = [i for i in [row["int_1"], row["int_2"]] if isinstance(i, int)]
        if ints:
            int_min = min(ints)
            int_max = max(ints)
        else:
            int_min = float("nan")
            int_max = float("nan")
        assert_nan_equal(row["int_min"], int_min)
        assert_nan_equal(row["int_max"], int_max)

    # aggregate of variables defined only within aggregate function
    study = StudyDefinition(
        default_expectations={
            "date": {
                "earliest": "1900-01-01",
                "latest": "today"
            },
            "rate": "exponential_increase",
            "incidence": 1,
        },
        # We use an expression here (never mind that it's a trivial and
        # pointless one) as that triggers a bug which we want to ensure we've
        # fixed
        population=patients.satisfying("foo OR bar",
                                       foo=patients.all(),
                                       bar=patients.all()),
        date_min=patients.maximum_of(
            date_1=patients.with_these_clinical_events(
                codelist(["X"], system="ctv3"),
                returning="date",
                date_format="YYYY-MM-DD",
            ),
            date_2=patients.with_these_clinical_events(
                codelist(["X"], system="ctv3"),
                returning="date",
                date_format="YYYY-MM-DD",
            ),
        ),
        date_max=patients.maximum_of(
            date_3=patients.with_these_clinical_events(
                codelist(["X"], system="ctv3"),
                returning="date",
                date_format="YYYY-MM-DD",
            ),
            date_4=patients.with_these_clinical_events(
                codelist(["X"], system="ctv3"),
                returning="date",
                date_format="YYYY-MM-DD",
            ),
        ),
        int_min=patients.minimum_of(
            int_1=patients.with_these_clinical_events(
                codelist(["X"], system="ctv3"),
                returning="number_of_matches_in_period",
                return_expectations={
                    "int": {
                        "distribution": "normal",
                        "mean": 25,
                        "stddev": 5
                    },
                    "incidence": 0.5,
                },
            ),
            int_2=patients.with_these_clinical_events(
                codelist(["X"], system="ctv3"),
                returning="number_of_matches_in_period",
                return_expectations={
                    "int": {
                        "distribution": "normal",
                        "mean": 25,
                        "stddev": 5
                    },
                    "incidence": 0.5,
                },
            ),
        ),
        int_max=patients.maximum_of(
            int_3=patients.with_these_clinical_events(
                codelist(["X"], system="ctv3"),
                returning="number_of_matches_in_period",
                return_expectations={
                    "int": {
                        "distribution": "normal",
                        "mean": 25,
                        "stddev": 5
                    },
                    "incidence": 0.5,
                },
            ),
            int_4=patients.with_these_clinical_events(
                codelist(["X"], system="ctv3"),
                returning="number_of_matches_in_period",
                return_expectations={
                    "int": {
                        "distribution": "normal",
                        "mean": 25,
                        "stddev": 5
                    },
                    "incidence": 0.5,
                },
            ),
        ),
    )
    population_size = 10000
    result = study.make_df_from_expectations(population_size)
    for _, row in result.iterrows():
        print(row)
        assert pd.notna(row["date_min"])
        assert pd.notna(row["date_max"])
        assert pd.notna(row["int_min"])
        assert pd.notna(row["int_max"])

    # aggregate of variables defined both inside and outside aggregation
    study = StudyDefinition(
        default_expectations={
            "date": {
                "earliest": "1900-01-01",
                "latest": "today"
            },
            "rate": "exponential_increase",
            "incidence": 0.2,
        },
        population=patients.all(),
        date_1=patients.with_these_clinical_events(
            codelist(["X"], system="ctv3"),
            returning="date",
            date_format="YYYY-MM-DD",
        ),
        date_2=patients.with_these_clinical_events(
            codelist(["X"], system="ctv3"),
            returning="date",
            date_format="YYYY-MM-DD",
        ),
        date_min=patients.minimum_of(
            "date_1",
            "date_2",
            date_3=patients.with_these_clinical_events(
                codelist(["X"], system="ctv3"),
                returning="date",
                date_format="YYYY-MM-DD",
            ),
        ),
        date_max=patients.maximum_of(
            "date_1",
            "date_2",
            date_4=patients.with_these_clinical_events(
                codelist(["X"], system="ctv3"),
                returning="date",
                date_format="YYYY-MM-DD",
            ),
        ),
        int_1=patients.with_these_clinical_events(
            codelist(["X"], system="ctv3"),
            returning="number_of_matches_in_period",
            return_expectations={
                "int": {
                    "distribution": "normal",
                    "mean": 25,
                    "stddev": 5
                },
                "incidence": 0.5,
            },
        ),
        int_2=patients.with_these_clinical_events(
            codelist(["X"], system="ctv3"),
            returning="number_of_matches_in_period",
            return_expectations={
                "int": {
                    "distribution": "normal",
                    "mean": 25,
                    "stddev": 5
                },
                "incidence": 0.5,
            },
        ),
        int_min=patients.minimum_of(
            "int_1",
            "int_2",
            int_3=patients.with_these_clinical_events(
                codelist(["X"], system="ctv3"),
                returning="number_of_matches_in_period",
                return_expectations={
                    "int": {
                        "distribution": "normal",
                        "mean": 25,
                        "stddev": 5
                    },
                    "incidence": 0.5,
                },
            ),
        ),
        int_max=patients.maximum_of(
            "int_1",
            "int_2",
            int_4=patients.with_these_clinical_events(
                codelist(["X"], system="ctv3"),
                returning="number_of_matches_in_period",
                return_expectations={
                    "int": {
                        "distribution": "normal",
                        "mean": 25,
                        "stddev": 5
                    },
                    "incidence": 0.5,
                },
            ),
        ),
    )
    population_size = 10000
    result = study.make_df_from_expectations(population_size)
    for _, row in result.iterrows():
        print(row)
        dates = [
            d for d in [row["date_1"], row["date_2"]] if isinstance(d, str)
        ]
        if dates:
            date_min = min(dates)
            date_max = max(dates)
        else:
            date_min = float("nan")
            date_max = float("nan")
        assert_nan_equal(row["date_min"], date_min)
        assert_nan_equal(row["date_max"], date_max)
        ints = [i for i in [row["int_1"], row["int_2"]] if isinstance(i, int)]
        if ints:
            int_min = min(ints)
            int_max = max(ints)
        else:
            int_min = float("nan")
            int_max = float("nan")
        assert_nan_equal(row["int_min"], int_min)
        assert_nan_equal(row["int_max"], int_max)
Beispiel #13
0
study = StudyDefinition(
    default_expectations={
        "date": {
            "earliest": "1970-01-01",
            "latest": "today"
        },
        "rate": "uniform",
        "incidence": 0.2,
    },
    population=patients.registered_with_one_practice_between(
        "2019-02-01", "2020-02-01"),
    dereg_date=patients.date_deregistered_from_all_supported_practices(
        on_or_before="2020-12-01",
        date_format="YYYY-MM",
        return_expectations={"date": {
            "earliest": "2020-02-01"
        }},
    ),
    a_e_consult_date=patients.attended_emergency_care(
        on_or_after="2020-02-01",
        returning="date_arrived",
        date_format="YYYY-MM-DD",
        find_first_match_in_period=True,
        return_expectations={
            "date": {
                "earliest": "2020-02-01"
            },
            "rate": "exponential_increase",
        },
    ),
    icu_date_admitted=patients.admitted_to_icu(
        on_or_after="2020-02-01",
        include_day=True,
        returning="date_admitted",
        find_first_match_in_period=True,
        return_expectations={
            "date": {
                "earliest": "2020-02-01"
            },
            "rate": "exponential_increase",
        },
    ),
    died_date_cpns=patients.with_death_recorded_in_cpns(
        on_or_after="2020-02-01",
        returning="date_of_death",
        include_month=True,
        include_day=True,
        return_expectations={
            "date": {
                "earliest": "2020-02-01"
            },
            "rate": "exponential_increase",
        },
    ),
    age=patients.age_as_of(
        "2020-02-01",
        return_expectations={
            "rate": "universal",
            "int": {
                "distribution": "population_ages"
            },
        },
    ),
    sex=patients.sex(return_expectations={
        "rate": "universal",
        "category": {
            "ratios": {
                "M": 0.49,
                "F": 0.51
            }
        },
    }),
    imd=patients.address_as_of(
        "2020-02-01",
        returning="index_of_multiple_deprivation",
        round_to_nearest=100,
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "100": 0.1,
                    "200": 0.2,
                    "300": 0.7
                }
            },
        },
    ),
)
study = StudyDefinition(
    # Configure the expectations framework
    default_expectations={
        "date": {
            "earliest": "1900-01-01",
            "latest": "today"
        },
        "rate": "uniform",
        "incidence": 0.1,
    },

    # STUDY POPULATION

    # This line defines the study population
    population=patients.all(),

    #population=patients.satisfying(
    #        """
    #        has_follow_up AND
    #        (age >=18 AND age <= 110) AND
    #        (sex = "M" OR sex = "F") AND
    #        imd > 0 AND
    #        (rheumatoid OR sle) AND NOT
    #        chloroquine_not_hcq
    #        """,
    #        has_follow_up=patients.registered_with_one_practice_between(
    #        "2019-02-28", "2020-02-29"
    #    ),

    #),
    has_12mfollow_up=patients.registered_with_one_practice_between(
        "2019-02-01",
        "2020-02-01",
        return_expectations={"incidence": 0.9},
    ),
    under_fup_1feb=patients.registered_with_one_practice_between(
        "2020-02-01",
        "2020-02-01",
        return_expectations={"incidence": 0.9},
    ),

    # The rest of the lines define the covariates with associated GitHub issues
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/33
    age=patients.age_as_of(
        "2020-02-01",
        return_expectations={
            "rate": "universal",
            "int": {
                "distribution": "population_ages"
            },
        },
    ),
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/46
    sex=patients.sex(return_expectations={
        "rate": "universal",
        "category": {
            "ratios": {
                "M": 0.49,
                "F": 0.51
            }
        },
    }),
    imd=patients.address_as_of(
        "2020-02-29",
        returning="index_of_multiple_deprivation",
        round_to_nearest=100,
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "100": 0.1,
                    "200": 0.2,
                    "300": 0.7
                }
            },
        },
    ),
)
study = StudyDefinition(
    default_expectations={
        "date": {
            "earliest": "1980-01-01",
            "latest": "today"
        },
        "rate": "uniform",
        "incidence": 0.05,
    },
    index_date="2019-02-01",
    population=patients.satisfying(
        """
            has_follow_up
        AND (age >=18 AND age <= 110)
        """,
        has_follow_up=patients.registered_with_one_practice_between(
            "index_date - 1 year", "index_date"),
        age=patients.age_as_of(
            "index_date",
            return_expectations={
                "rate": "universal",
                "int": {
                    "distribution": "population_ages"
                },
            },
        ),
    ),
    covid_hospitalisation=patients.categorised_as(
        {
            "COVID-19 positive": "covid_positive AND NOT covid_hospitalised",
            "COVID-19 hospitalised": "covid_hospitalised",
            "General population": "DEFAULT",
        },
        return_expectations={
            "incidence": 1,
            "category": {
                "ratios": {
                    "COVID-19 positive": 0.1,
                    "COVID-19 hospitalised": 0.1,
                    "General population": 0.8,
                }
            },
        },
        covid_positive=patients.with_test_result_in_sgss(
            pathogen="SARS-CoV-2",
            test_result="positive",
            between=["2020-01-01", "last_day_of_month(index_date)"],
            date_format="YYYY-MM-DD",
            return_expectations={"date": {
                "earliest": "index_date"
            }},
        ),
        covid_hospitalised=patients.admitted_to_hospital(
            with_these_diagnoses=covid_codelist,
            between=["2020-01-01", "last_day_of_month(index_date)"],
            return_expectations={"incidence": 0.20},
        ),
    ),
    stroke=patients.satisfying(
        "stroke_gp OR stroke_hospital OR stroke_ons",
        stroke_gp=patients.with_these_clinical_events(
            stroke,
            between=["index_date", "last_day_of_month(index_date)"],
            return_expectations={"incidence": 0.05},
        ),
        stroke_hospital=patients.admitted_to_hospital(
            with_these_diagnoses=stroke_hospital,
            between=["index_date", "last_day_of_month(index_date)"],
            return_expectations={"incidence": 0.05},
        ),
        stroke_ons=patients.with_these_codes_on_death_certificate(
            stroke_hospital,
            between=["index_date", "last_day_of_month(index_date)"],
            return_expectations={"incidence": 0.05},
        ),
    ),
    DVT=patients.satisfying(
        "dvt_gp OR dvt_hospital OR dvt_ons",
        dvt_gp=patients.with_these_clinical_events(
            filter_codes_by_category(vte_codes_gp, include=["dvt"]),
            between=["index_date", "last_day_of_month(index_date)"],
            return_expectations={"incidence": 0.05},
        ),
        dvt_hospital=patients.admitted_to_hospital(
            with_these_diagnoses=filter_codes_by_category(vte_codes_hospital,
                                                          include=["dvt"]),
            between=["index_date", "last_day_of_month(index_date)"],
            return_expectations={"incidence": 0.05},
        ),
        dvt_ons=patients.with_these_codes_on_death_certificate(
            filter_codes_by_category(vte_codes_hospital, include=["dvt"]),
            between=["index_date", "last_day_of_month(index_date)"],
            return_expectations={"incidence": 0.05},
        ),
    ),
    PE=patients.satisfying(
        "pe_gp OR pe_hospital OR pe_ons",
        pe_gp=patients.with_these_clinical_events(
            filter_codes_by_category(vte_codes_gp, include=["pe"]),
            between=["index_date", "last_day_of_month(index_date)"],
            return_expectations={"incidence": 0.05},
        ),
        pe_hospital=patients.admitted_to_hospital(
            with_these_diagnoses=filter_codes_by_category(vte_codes_hospital,
                                                          include=["pe"]),
            between=["index_date", "last_day_of_month(index_date)"],
            return_expectations={"incidence": 0.05},
        ),
        pe_ons=patients.with_these_codes_on_death_certificate(
            filter_codes_by_category(vte_codes_hospital, include=["pe"]),
            between=["index_date", "last_day_of_month(index_date)"],
            return_expectations={"incidence": 0.05},
        ),
    ),
    AKI=patients.satisfying(
        "aki_hospital OR aki_ons",
        aki_hospital=patients.admitted_to_hospital(
            with_these_diagnoses=aki_codes,
            between=["index_date", "last_day_of_month(index_date)"],
            return_expectations={"incidence": 0.05},
        ),
        aki_ons=patients.with_these_codes_on_death_certificate(
            aki_codes,
            between=["index_date", "last_day_of_month(index_date)"],
            return_expectations={"incidence": 0.05},
        ),
    ),
    MI=patients.satisfying(
        "mi_gp OR mi_hospital OR mi_ons",
        mi_gp=patients.with_these_clinical_events(
            mi_codes,
            between=["index_date", "last_day_of_month(index_date)"],
            return_expectations={"incidence": 0.05},
        ),
        mi_hospital=patients.admitted_to_hospital(
            with_these_diagnoses=filter_codes_by_category(mi_codes_hospital,
                                                          include=["1"]),
            between=["index_date", "last_day_of_month(index_date)"],
            return_expectations={"incidence": 0.05},
        ),
        mi_ons=patients.with_these_codes_on_death_certificate(
            filter_codes_by_category(mi_codes_hospital, include=["1"]),
            between=["index_date", "last_day_of_month(index_date)"],
            return_expectations={"incidence": 0.05},
        ),
    ),
    heart_failure=patients.satisfying(
        "heart_failure_gp OR heart_failure_hospital OR heart_failure_ons",
        heart_failure_gp=patients.with_these_clinical_events(
            heart_failure_codes,
            between=["index_date", "last_day_of_month(index_date)"],
            return_expectations={"incidence": 0.05},
        ),
        heart_failure_hospital=patients.admitted_to_hospital(
            with_these_diagnoses=filter_codes_by_category(
                heart_failure_codes_hospital, include=["1"]),
            between=["index_date", "last_day_of_month(index_date)"],
            return_expectations={"incidence": 0.05},
        ),
        heart_failure_ons=patients.with_these_codes_on_death_certificate(
            filter_codes_by_category(heart_failure_codes_hospital,
                                     include=["1"]),
            between=["index_date", "last_day_of_month(index_date)"],
            return_expectations={"incidence": 0.05},
        ),
    ),
    ketoacidosis=patients.satisfying(
        "ketoacidosis_hospital OR ketoacidosis_ons",
        ketoacidosis_hospital=patients.admitted_to_hospital(
            with_these_diagnoses=ketoacidosis_codes,
            between=["index_date", "last_day_of_month(index_date)"],
            return_expectations={"incidence": 0.05},
        ),
        ketoacidosis_ons=patients.with_these_codes_on_death_certificate(
            ketoacidosis_codes,
            between=["index_date", "last_day_of_month(index_date)"],
            return_expectations={"incidence": 0.05},
        ),
    ),
    died=patients.died_from_any_cause(
        between=["index_date", "last_day_of_month(index_date)"],
        return_expectations={"incidence": 0.1},
    ),
)
Beispiel #16
0
    ),
    event_date_month=patients.with_these_clinical_events(
        cl,
        returning="date",
        date_format="YYYY-MM",
        return_expectations={"rate": "uniform", "incidence": 0.5},
    ),
    event_date_year=patients.with_these_clinical_events(
        cl,
        returning="date",
        date_format="YYYY",
        return_expectations={"rate": "uniform", "incidence": 0.5},
    ),
)

study = StudyDefinition(**column_definitions)
covariate_definitions = study.covariate_definitions

fixtures_path = Path(__file__).parent / "fixtures" / "dummy-data"


# Create a second test study to which we can add columns without needing to rebuild all
# the test fixtures
study_2 = StudyDefinition(
    **column_definitions,
    category_date=patients.categorised_as(
        {
            "2020-10-15": "age > 50",
            "2021-11-16": "DEFAULT",
        },
        return_expectations={
Beispiel #17
0
## STUDY POPULATION

study = StudyDefinition(
    default_expectations={
        "date": {
            "earliest": index_date,
            "latest": "today"
        },  # date range for simulated dates
        "rate": "uniform",
        "incidence": 1
    },

    # This line defines the study population
    population=patients.registered_as_of(index_date),

    # this line defines the stp variable we want to extract
    stp=patients.registered_practice_as_of(
        index_date,
        returning="stp_code",
        return_expectations={
            "category": {
                "ratios": {
                    "STP1": 0.3,
                    "STP2": 0.2,
                    "STP3": 0.5
                }
            },
        },
    ),
)
# Import codelists

from codelists import *
from datetime import date


start_date = "2020-12-07"
end_date = "2021-02-01"
# Specifiy study definition

study = StudyDefinition(
    default_expectations={
        "date": {"earliest": start_date, "latest": end_date},
        "rate": "exponential_increase",
        "incidence": 0.1,
    },

    population=patients.registered_as_of(start_date),


    practice=patients.registered_practice_as_of(
        start_date,
        returning="pseudo_id",
        return_expectations={
            "int": {"distribution": "normal", "mean": 25, "stddev": 5}, "incidence": 0.5}
    ),

    
)
Beispiel #19
0
study = StudyDefinition(
    index_date="2020-02-01",
    default_expectations={
        "date": {
            "earliest": "1900-01-01",
            "latest": "index_date"
        },
        "rate": "exponential_increase",
    },
    population=patients.registered_with_one_practice_between(
        "index_date - 1 year", "index_date"),
    has_chronic_liver_disease=patients.with_these_clinical_events(
        chronic_liver_disease_codes,
        returning="binary_flag",
        return_expectations={
            "incidence": 1.0,
            "date": {
                "earliest": "1950-01-01",
                "latest": "index_date"
            },
        },
    ),
    stp=patients.registered_practice_as_of(
        "index_date",
        returning="stp_code",
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "STP1": 1.0
                }
            },
        },
    ),
)
Beispiel #20
0
study = StudyDefinition(
    # Configure the expectations framework
    default_expectations={
        "date": {
            "earliest": "1900-01-01",
            "latest": "today"
        },
        "rate": "uniform",
        "incidence": 0.1,
    },

    # STUDY POPULATION
    # This line defines the study population
    population=patients.all(),
    has_follow_up=patients.registered_with_one_practice_between(
        "2019-02-28",
        "2020-02-01",
        return_expectations={"incidence": 0.9},
    ),

    # ETHNICITY

    #ETHNICITY IN 16 CATEGORIES
    ethnicity_16=patients.with_these_clinical_events(
        ethnicity_codes_16,
        returning="category",
        find_last_match_in_period=True,
        include_date_of_match=True,
        return_expectations={
            "category": {
                "ratios": {
                    "1": 0.0625,
                    "2": 0.0625,
                    "3": 0.0625,
                    "4": 0.0625,
                    "5": 0.0625,
                    "6": 0.0625,
                    "7": 0.0625,
                    "8": 0.0625,
                    "9": 0.0625,
                    "10": 0.0625,
                    "11": 0.0625,
                    "12": 0.0625,
                    "13": 0.0625,
                    "14": 0.0625,
                    "15": 0.0625,
                    "16": 0.0625,
                }
            },
            "incidence": 0.75,
        },
    ),

    # ETHNICITY IN 6 CATEGORIES
    ethnicity=patients.with_these_clinical_events(
        ethnicity_codes,
        returning="category",
        find_last_match_in_period=True,
        include_date_of_match=True,
        return_expectations={
            "category": {
                "ratios": {
                    "1": 0.2,
                    "2": 0.2,
                    "3": 0.2,
                    "4": 0.2,
                    "5": 0.2
                }
            },
            "incidence": 0.75,
        },
    ),

    # The rest of the lines define the covariates with associated GitHub issues
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/33
    age=patients.age_as_of(
        "2020-03-01",
        return_expectations={
            "rate": "universal",
            "int": {
                "distribution": "population_ages"
            },
        },
    ),
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/46
    sex=patients.sex(return_expectations={
        "rate": "universal",
        "category": {
            "ratios": {
                "M": 0.49,
                "F": 0.51
            }
        },
    }),
)
study = StudyDefinition(
    index_date="2020-02-01",
    # Configure the expectations framework
    default_expectations={
        "date": {"earliest": "1900-01-01", "latest": "index_date"},
        "rate": "exponential_increase",
    },
    # This line defines the study population
    population=patients.registered_with_one_practice_between(
        "index_date - 1 year", "index_date"
    ),
    # The rest of the lines define the covariates with associated GitHub issues
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/33
    age=patients.age_as_of(
        "index_date",
        return_expectations={
            "rate": "universal",
            "int": {"distribution": "population_ages"},
        },
    ),
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/46
    sex=patients.sex(
        return_expectations={
            "rate": "universal",
            "category": {"ratios": {"M": 0.49, "F": 0.51}},
        }
    ),
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/7
    chronic_cardiac_disease=patients.with_these_clinical_events(
        chronic_cardiac_disease_codes,
        returning="date",
        find_first_match_in_period=True,
        include_month=True,
        return_expectations={"incidence": 0.2},
    ),
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/12
    chronic_liver_disease=patients.with_these_clinical_events(
        chronic_liver_disease_codes,
        returning="date",
        find_first_match_in_period=True,
        include_month=True,
        return_expectations={
            "incidence": 0.2,
            "date": {"earliest": "1950-01-01", "latest": "index_date"},
        },
    ),
    has_chronic_liver_disease=patients.with_these_clinical_events(
        chronic_liver_disease_codes,
        returning="binary_flag",
        return_expectations={
            "incidence": 0.2,
            "date": {"earliest": "1950-01-01", "latest": "index_date"},
        },
    ),
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/10
    bmi=patients.most_recent_bmi(
        on_or_after="index_date",
        minimum_age_at_measurement=16,
        include_measurement_date=True,
        include_month=True,
        return_expectations={
            "incidence": 0.6,
            "float": {"distribution": "normal", "mean": 35, "stddev": 10},
        },
    ),
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/35
    bp_sys=patients.mean_recorded_value(
        systolic_blood_pressure_codes,
        on_most_recent_day_of_measurement=True,
        on_or_before="index_date",
        include_measurement_date=True,
        include_month=True,
        return_expectations={
            "incidence": 0.6,
            "float": {"distribution": "normal", "mean": 80, "stddev": 10},
        },
    ),
    bp_dias=patients.mean_recorded_value(
        diastolic_blood_pressure_codes,
        on_most_recent_day_of_measurement=True,
        on_or_before="index_date",
        include_measurement_date=True,
        include_month=True,
        return_expectations={
            "incidence": 0.6,
            "float": {"distribution": "normal", "mean": 120, "stddev": 10},
        },
    ),
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/54
    stp=patients.registered_practice_as_of(
        "index_date",
        returning="stp_code",
        return_expectations={
            "rate": "universal",
            "category": {"ratios": {"STP1": 0.5, "STP2": 0.5}},
        },
    ),
    msoa=patients.registered_practice_as_of(
        "index_date",
        returning="msoa_code",
        return_expectations={
            "rate": "universal",
            "category": {"ratios": {"MSOA1": 0.5, "MSOA2": 0.5}},
        },
    ),
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/52
    imd=patients.address_as_of(
        "index_date",
        returning="index_of_multiple_deprivation",
        round_to_nearest=100,
        return_expectations={
            "rate": "universal",
            "category": {"ratios": {"100": 0.1, "200": 0.2, "300": 0.7}},
        },
    ),
    rural_urban=patients.address_as_of(
        "index_date",
        returning="rural_urban_classification",
        return_expectations={
            "rate": "universal",
            "category": {"ratios": {"rural": 0.1, "urban": 0.9}},
        },
    ),
    recent_salbutamol_count=patients.with_these_medications(
        salbutamol_codes,
        between=["index_date - 2 years", "index_date"],
        returning="number_of_matches_in_period",
        return_expectations={
            "incidence": 0.6,
            "int": {"distribution": "normal", "mean": 8, "stddev": 2},
        },
    ),
)
Beispiel #22
0
study = StudyDefinition(
    # Configure the expectations framework
    default_expectations={
        "date": {
            "earliest": "2020-01-01",
            "latest": "today"
        },
        "rate": "universal",
    },

    # define the study index date
    index_date=index_date,

    # This line defines the study population
    population=patients.satisfying(
        "(NOT died) AND (registered)",
        died=patients.died_from_any_cause(on_or_before=index_date,
                                          returning="binary_flag"),
        registered=patients.registered_as_of(index_date),
    ),
    age=patients.age_as_of(index_date,
                           return_expectations={
                               "rate": "universal",
                               "int": {
                                   "distribution": "population_ages"
                               }
                           }),
    age_group=patients.categorised_as(
        {
            "0": "DEFAULT",
            "0 - under 16": """ age < 16""",
            "16 - under 30": """ age >= 16 AND age < 30""",
            "30 - under 40": """ age >= 30 AND age < 40""",
            "40 - under 50": """ age >= 40 AND age < 50""",
            "50 - under 55": """ age >= 50 AND age < 55""",
            "55 - under 60": """ age >= 55 AND age < 60""",
            "60 - under 65": """ age >= 60 AND age < 65""",
            "65 - under 70": """ age >= 65 AND age < 70""",
            "70 - under 75": """ age >= 70 AND age < 75""",
            "75 - under 80": """ age >= 75 AND age < 80""",
            "80 - under 85": """ age >= 80 AND age < 85""",
            "85 plus": """ age >=  85""",
        },
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "0 - under 16": 0.05,
                    "16 - under 30": 0.1,
                    "30 - under 40": 0.1,
                    "40 - under 50": 0.1,
                    "50 - under 55": 0.1,
                    "55 - under 60": 0.05,
                    "60 - under 65": 0.1,
                    "65 - under 70": 0.1,
                    "70 - under 75": 0.05,
                    "75 - under 80": 0.1,
                    "80 - under 85": 0.1,
                    "85 plus": 0.05,
                }
            },
        },
    ),
    stp=patients.registered_practice_as_of(
        "index_date",
        returning="stp_code",
        return_expectations={
            "category": {
                "ratios": {
                    "STP1": 0.5,
                    "STP2": 0.5
                }
            },
        },
    ),
    first_dose=patients.with_these_clinical_events(
        first_dose_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    second_dose=patients.with_these_clinical_events(
        second_dose_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    az_first_dose=patients.with_these_clinical_events(
        az_first_dose_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    az_second_dose=patients.with_these_clinical_events(
        az_second_dose_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    pf_first_dose=patients.with_these_clinical_events(
        pf_first_dose_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    pf_second_dose=patients.with_these_clinical_events(
        pf_second_dose_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    mo_first_dose=patients.with_these_clinical_events(
        mo_first_dose_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    mo_second_dose=patients.with_these_clinical_events(
        mo_second_dose_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    nx_first_dose=patients.with_these_clinical_events(
        nx_first_dose_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    nx_second_dose=patients.with_these_clinical_events(
        nx_second_dose_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    jn_first_dose=patients.with_these_clinical_events(
        jn_first_dose_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    jn_second_dose=patients.with_these_clinical_events(
        jn_second_dose_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    gs_first_dose=patients.with_these_clinical_events(
        gs_first_dose_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    gs_second_dose=patients.with_these_clinical_events(
        gs_second_dose_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    vl_first_dose=patients.with_these_clinical_events(
        vl_first_dose_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    vl_second_dose=patients.with_these_clinical_events(
        vl_second_dose_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
)
Beispiel #23
0
study = StudyDefinition(
        # Configure the expectations framework
    default_expectations={
        "date": {"earliest": index_date, "latest": end_date},
        "rate": "uniform",
    },

    index_date = index_date,

    # This line defines the study population
    population = patients.satisfying(
        """
        (sex = 'F' OR sex = 'M') AND
        (age >= 18 AND age < 120) AND
        (NOT died) AND
        (registered)
        """,
        
        registered = patients.registered_as_of(index_date),
        died = patients.died_from_any_cause(
		    on_or_before=index_date,
		    returning="binary_flag",
        ),
    ),

    age = patients.age_as_of(
        index_date,
        return_expectations={
            "int": {"distribution": "population_ages"},
            "incidence": 1
        },
    ),

    sex = patients.sex(
        return_expectations={
            "category": {"ratios": {"M": 0.49, "F": 0.51}},
            "incidence": 1
        }
    ),
    
    date_death = patients.died_from_any_cause(
        between = [index_date, end_date],
        returning = "date_of_death",
        date_format = "YYYY-MM-DD",
        return_expectations = {
            "incidence": 0.2,
        },
    ),

    death_category = patients.categorised_as(
        {
            "covid-death": "died_covid",
            "non-covid-death": "(NOT died_covid) AND died_any",
            "alive" : "DEFAULT"
        },

        died_covid = patients.with_these_codes_on_death_certificate(
            codes_ICD10_covid,
            returning = "binary_flag",
            match_only_underlying_cause = False,
            between = [index_date, end_date],
        ),

        died_any = patients.died_from_any_cause(
		    between = [index_date, end_date],
		    returning = "binary_flag",
        ),

        return_expectations = {
            "category": {"ratios": {"alive": 0.8, "covid-death": 0.1, "non-covid-death": 0.1}}, 
            "incidence": 1
        },
    ),

    

)
Beispiel #24
0
study = StudyDefinition(
    # Configure the expectations framework
    default_expectations={
        "date": {"earliest": "1900-01-01", "latest": "today"},
        "rate": "uniform",
        "incidence": 0.1,
    },
    # This line defines the study population
    population=patients.satisfying(
            """
            has_follow_up AND
            (age >=18 AND age <= 110) AND
            (sex = "M" OR sex = "F")
            """,
            has_follow_up=patients.registered_with_one_practice_between(
                "2019-02-28", "2020-02-29"
            )
        ),
    # Outcomes
    icu_date_admitted=patients.admitted_to_icu(
        on_or_after="2020-03-01",
        include_day=True,
        returning="date_admitted",
        find_first_match_in_period=True,
        return_expectations={"date": {"earliest": "2020-03-01"}, "incidence": 0.1},
    ),
    died_ons_covid_flag_any=patients.with_these_codes_on_death_certificate(
        covid_identification, on_or_after="2020-03-01", match_only_underlying_cause=False,
        return_expectations={"date": {"earliest": "2020-03-01"}, "incidence": 0.1},
    ),
    died_ons_covid_flag_underlying=patients.with_these_codes_on_death_certificate(
        covid_identification, on_or_after="2020-03-01", match_only_underlying_cause=True,
        return_expectations={"date": {"earliest": "2020-03-01"}, "incidence": 0.1},
    ),
    died_date_ons=patients.died_from_any_cause(
        on_or_after="2020-03-01",
        returning="date_of_death",
        include_month=True,
        include_day=True,
        return_expectations={"date": {"earliest": "2020-03-01"}, "incidence": 0.1},
    ),
    # COVID-19 outcomes
    first_pos_test_sgss=patients.with_test_result_in_sgss(
        pathogen="SARS-CoV-2",
        test_result="positive",
        find_first_match_in_period=True,
        returning="date",
        date_format="YYYY-MM-DD",
        return_expectations={"date": {"earliest": "2020-01-01"}},
    ),


    # The rest of the lines define the covariates with associated GitHub issues
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/33
    age=patients.age_as_of(
        "2020-03-01",
        return_expectations={
            "rate": "universal",
            "int": {"distribution": "population_ages"},
        },
    ),
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/46
    sex=patients.sex(
        return_expectations={
            "rate": "universal",
            "category": {"ratios": {"M": 0.49, "F": 0.51}},
        }
    ),
    ethnicity=patients.with_these_clinical_events(
        ethnicity_codes,
        returning="category",
        find_last_match_in_period=True,
        include_date_of_match=True,
        return_expectations={
            "category": {"ratios": {"1": 0.8, "5": 0.1, "3": 0.1}},
            "incidence": 0.75,
        },
    ),
    # IMID disease codes
    atopic_dermatitis=first_diagnosis_in_period(atopic_dermatitis_codes),
    crohns_disease=first_diagnosis_in_period(crohns_disease_codes),
    ulcerative_colitis=first_diagnosis_in_period(ulcerative_colitis_codes),
    inflammatory_bowel_disease_unclassified=first_diagnosis_in_period(inflammatory_bowel_disease_unclassified_codes),
    psoriasis=first_diagnosis_in_period(psoriasis_codes),
    hidradenitis_suppurativa=first_diagnosis_in_period(hidradenitis_suppurativa_codes),
    psoriatic_arthritis=first_diagnosis_in_period(psoriatic_arthritis_codes),
    rheumatoid_arthritis=first_diagnosis_in_period(rheumatoid_arthritis_codes),
    ankylosing_spondylitis=first_diagnosis_in_period(ankylosing_spondylitis_codes),
    
    # Comorbidities
    chronic_cardiac_disease=first_diagnosis_in_period(chronic_cardiac_disease_codes),
    diabetes=first_diagnosis_in_period(diabetes_codes),
    hba1c_new=first_diagnosis_in_period(hba1c_new_codes),
    hba1c_old=first_diagnosis_in_period(hba1c_old_codes),
    hba1c_mmol_per_mol=patients.with_these_clinical_events(
        hba1c_new_codes,
        find_last_match_in_period=True,
        on_or_before="2020-02-29",
        returning="numeric_value",
        include_date_of_match=True,
        include_month=True,
        return_expectations={
            "date": {"latest": "2020-02-29"},
            "float": {"distribution": "normal", "mean": 40.0, "stddev": 20},
            "incidence": 0.95,
        },
    ),

    hba1c_percentage=patients.with_these_clinical_events(
        hba1c_old_codes,
        find_last_match_in_period=True,
        on_or_before="2020-02-29",
        returning="numeric_value",
        include_date_of_match=True,
        include_month=True,
        return_expectations={
            "date": {"latest": "2020-02-29"},
            "float": {"distribution": "normal", "mean": 5, "stddev": 2},
            "incidence": 0.95,
        },
    ),
    hypertension=first_diagnosis_in_period(hypertension_codes),
    chronic_respiratory_disease=first_diagnosis_in_period(chronic_respiratory_disease_codes),
    copd=first_diagnosis_in_period(copd_codes),
    chronic_liver_disease=first_diagnosis_in_period(chronic_liver_disease_codes),
    stroke=first_diagnosis_in_period(stroke_codes),
    lung_cancer=first_diagnosis_in_period(lung_cancer_codes),
    haem_cancer=first_diagnosis_in_period(haem_cancer_codes),
    other_cancer=first_diagnosis_in_period(other_cancer_codes),
    #CKD
    creatinine=patients.with_these_clinical_events(
        creatinine_codes,
        find_last_match_in_period=True,
        between=["2018-12-01", "2020-02-29"],
        returning="numeric_value",
        include_date_of_match=True,
        include_month=True,
        return_expectations={
            "float": {"distribution": "normal", "mean": 150.0, "stddev": 200.0},
            "date": {"earliest": "2018-12-01", "latest": "2020-02-29"},
            "incidence": 0.95,
        },
    ),
    #### end stage renal disease codes incl. dialysis / transplant 
    esrf=patients.with_these_clinical_events(
        ckd_codes,
        on_or_before="2020-02-29",
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={"date": {"latest": "2020-02-29"}},
    ),
    ckd=first_diagnosis_in_period(ckd_codes),
    organ_transplant=first_diagnosis_in_period(organ_transplant_codes),
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/10
    bmi=patients.most_recent_bmi(
        on_or_after="2010-02-01",
        minimum_age_at_measurement=16,
        include_measurement_date=True,
        include_month=True,
        return_expectations={
            "incidence": 0.6,
            "float": {"distribution": "normal", "mean": 35, "stddev": 10},
        },
    ),
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/54
    stp=patients.registered_practice_as_of(
        "2020-03-01",
        returning="stp_code",
        return_expectations={
            "rate": "universal",
            "category": {"ratios": {"STP1": 0.5, "STP2": 0.5}},
        },
    ),
    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/52
    imd=patients.address_as_of(
        "2020-03-01",
        returning="index_of_multiple_deprivation",
        round_to_nearest=100,
        return_expectations={
            "rate": "universal",
            "category": {"ratios": {"100": 0.1, "200": 0.2, "300": 0.7}},
        },
    ),
    #SMOKING
    smoking_status=patients.categorised_as(
        {
            "S": "most_recent_smoking_code = 'S'",
            "E": """
                     most_recent_smoking_code = 'E' OR (    
                       most_recent_smoking_code = 'N' AND ever_smoked   
                     )  
                """,
            "N": "most_recent_smoking_code = 'N' AND NOT ever_smoked",
            "M": "DEFAULT",
        },
        return_expectations={
            "category": {"ratios": {"S": 0.6, "E": 0.1, "N": 0.2, "M": 0.1}}
        },
        most_recent_smoking_code=patients.with_these_clinical_events(
            clear_smoking_codes,
            find_last_match_in_period=True,
            on_or_before="2020-02-29",
            returning="category",
        ),
        ever_smoked=patients.with_these_clinical_events(
            filter_codes_by_category(clear_smoking_codes, include=["S", "E"]),
            on_or_before="2020-02-29",
        ),
    ),
#    smoking_status_date=patients.with_these_clinical_events(
#        clear_smoking_codes,
#        on_or_before="2020-02-29",
#        return_last_date_in_period=True,
#        include_month=True,
#        return_expectations={"date": {"latest": "2020-02-29"}},
#    ),
    ### GP CONSULTATION RATE
    gp_consult_count=patients.with_gp_consultations(
        between=["2019-03-01", "2020-02-29"],
        returning="number_of_matches_in_period",
        return_expectations={
            "int": {"distribution": "normal", "mean": 4, "stddev": 2},
            "date": {"earliest": "2019-03-01", "latest": "2020-02-29"},
            "incidence": 0.7,
        },
    ),
#    has_consultation_history=patients.with_complete_gp_consultation_history_between(
#        "2019-03-01", "2020-02-29", return_expectations={"incidence": 0.9},
#    ),
    # Medications

    **medication_counts_and_dates("oral_prednisolone", "opensafely-asthma-oral-prednisolone-medication", False),
    **medication_counts_and_dates("azathioprine", "crossimid-azathioprine-medication", False),
    **medication_counts_and_dates("ciclosporin", "crossimid-ciclosporin-medication", False),
    **medication_counts_and_dates("gold", "crossimid-gold-medication", False),
    **medication_counts_and_dates("leflunomide", "crossimid-leflunomide-medication", False),
    **medication_counts_and_dates("mercaptopurine", "crossimid-mercaptopurine-medication", False),
    **medication_counts_and_dates("methotrexate", "crossimid-methotrexate-medication", False),
    **medication_counts_and_dates("mycophenolate", "crossimid-mycophenolate-medication", False),
    **medication_counts_and_dates("penicillamine", "crossimid-penicillamine-medication", False),
    **medication_counts_and_dates("sulfasalazine", "crossimid-sulfasalazine-medication", False),
    **medication_counts_and_dates("mesalazine", "crossimid-mesalazine-medication", False),
    **medication_counts_and_dates("atopic_dermatitis_meds", "crossimid-atopic-dermatitis-medication", False),
    **medication_counts_and_dates("abatacept", "opensafely-high-cost-drugs-abatacept", True),
    **medication_counts_and_dates("adalimumab", "opensafely-high-cost-drugs-adalimumab", True),
    **medication_counts_and_dates("baricitinib", "opensafely-high-cost-drugs-baricitinib", True),
    **medication_counts_and_dates("brodalumab", "opensafely-high-cost-drugs-brodalumab", True),
    **medication_counts_and_dates("certolizumab", "opensafely-high-cost-drugs-certolizumab", True),
    #**medication_counts_and_dates("dupilumab", "opensafely-high-cost-drugs-dupilumab", True),
    **medication_counts_and_dates("etanercept", "opensafely-high-cost-drugs-etanercept", True),
    **medication_counts_and_dates("golimumab", "opensafely-high-cost-drugs-golimumab", True),
    **medication_counts_and_dates("guselkumab", "opensafely-high-cost-drugs-guselkumab", True),
    **medication_counts_and_dates("infliximab", "opensafely-high-cost-drugs-infliximab", True),
    **medication_counts_and_dates("ixekizumab", "opensafely-high-cost-drugs-ixekizumab", True),
    **medication_counts_and_dates("mepolizumab", "opensafely-high-cost-drugs-mepolizumab", True),
    **medication_counts_and_dates("methotrexate_hcd", "opensafely-high-cost-drugs-methotrexate", True),
    **medication_counts_and_dates("risankizumab", "opensafely-high-cost-drugs-risankizumab", True),
    **medication_counts_and_dates("rituximab", "opensafely-high-cost-drugs-rituximab", True, True),
    **medication_counts_and_dates("sarilumab", "opensafely-high-cost-drugs-sarilumab", True),
    **medication_counts_and_dates("secukinumab", "opensafely-high-cost-drugs-secukinumab", True),
    **medication_counts_and_dates("tildrakizumab", "opensafely-high-cost-drugs-tildrakizumab", True),
    **medication_counts_and_dates("tocilizumab", "opensafely-high-cost-drugs-tocilizumab", True),
    **medication_counts_and_dates("tofacitinib", "opensafely-high-cost-drugs-tofacitinib", True),
    **medication_counts_and_dates("upadacitinib", "opensafely-high-cost-drugs-upadacitinib", True),
    **medication_counts_and_dates("ustekinumab", "opensafely-high-cost-drugs-ustekinumab", True),
    **medication_counts_and_dates("vedolizumab", "opensafely-high-cost-drugs-vedolizumab", True)
)
Beispiel #25
0
study = StudyDefinition(
    default_expectations={
        "date": {
            "earliest": "1970-01-01",
            "latest": "today"
        },
        "rate": "uniform",
        "incidence": 0.2,
    },

    # STUDY POPULATION
    population=patients.registered_with_one_practice_between(
        "2019-11-01", "2020-02-01"),
    dereg_date=patients.date_deregistered_from_all_supported_practices(
        on_or_after="2020-02-01",
        date_format="YYYY-MM",
    ),

    # FOLLOW UP
    has_12_m_follow_up=patients.registered_with_one_practice_between(
        "2019-02-01",
        "2020-01-31",  ### 12 months prior to 1st Feb 2020
        return_expectations={
            "incidence": 0.95,
        }),

    # OUTCOMES
    died_ons_covid_flag_any=patients.with_these_codes_on_death_certificate(
        covid_codelist,
        on_or_after="2020-02-01",
        match_only_underlying_cause=False,
        return_expectations={
            "date": {
                "earliest": "2020-02-01"
            },
            "incidence": 0.6
        },
    ),
    died_ons_covid_flag_underlying=patients.
    with_these_codes_on_death_certificate(
        covid_codelist,
        on_or_after="2020-02-01",
        match_only_underlying_cause=True,
        return_expectations={
            "date": {
                "earliest": "2020-02-01"
            },
            "incidence": 0.6
        },
    ),
    died_date_ons=patients.died_from_any_cause(
        on_or_after="2020-02-01",
        returning="date_of_death",
        include_month=True,
        include_day=True,
        return_expectations={
            "date": {
                "earliest": "2020-02-01"
            },
            "incidence": 0.8
        },
    ),
    covid_icu_date=patients.admitted_to_icu(
        on_or_after="2020-02-01",
        include_day=True,
        returning="date_admitted",
        find_first_match_in_period=True,
        return_expectations={
            "date": {
                "earliest": "2020-02-01"
            },
            "incidence": 0.8
        },
    ),
    covid_tpp_probable=patients.with_these_clinical_events(
        combine_codelists(
            covid_identification_in_primary_care_case_codes_clinical,
            covid_identification_in_primary_care_case_codes_test,
            covid_identification_in_primary_care_case_codes_seq),
        return_first_date_in_period=True,
        include_day=True,
        return_expectations={
            "date": {
                "earliest": "2020-02-01"
            },
            "incidence": 0.6
        },
    ),
    covid_admission_date=patients.admitted_to_hospital(
        returning="date_admitted",  # defaults to "binary_flag"
        with_these_diagnoses=covid_codelist,  # optional
        on_or_after="2020-02-01",
        find_first_match_in_period=True,
        date_format="YYYY-MM-DD",
        return_expectations={
            "date": {
                "earliest": "2020-03-01"
            },
            "incidence": 0.95
        },
    ),
    covid_admission_primary_diagnosis=patients.admitted_to_hospital(
        returning="primary_diagnosis",
        with_these_diagnoses=covid_codelist,  # optional
        on_or_after="2020-02-01",
        find_first_match_in_period=True,
        date_format="YYYY-MM-DD",
        return_expectations={
            "date": {
                "earliest": "2020-03-01"
            },
            "incidence": 0.95,
            "category": {
                "ratios": {
                    "U071": 0.5,
                    "U072": 0.5
                }
            },
        },
    ),
    positive_covid_test_ever=patients.with_test_result_in_sgss(
        pathogen="SARS-CoV-2",
        test_result="positive",
        return_expectations={"incidence": 0.1},
    ),

    ## DEMOGRAPHIC COVARIATES
    # AGE
    age=patients.age_as_of(
        "2020-02-01",
        return_expectations={
            "rate": "universal",
            "int": {
                "distribution": "population_ages"
            },
        },
    ),

    # SEX
    sex=patients.sex(return_expectations={
        "rate": "universal",
        "category": {
            "ratios": {
                "M": 0.49,
                "F": 0.51
            }
        },
    }),

    # DEPRIVIATION
    imd=patients.address_as_of(
        "2020-02-01",
        returning="index_of_multiple_deprivation",
        round_to_nearest=100,
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "100": 0.1,
                    "200": 0.2,
                    "300": 0.7
                }
            },
        },
    ),

    # GEOGRAPHIC REGION CALLED STP
    stp=patients.registered_practice_as_of(
        "2020-02-01",
        returning="stp_code",
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "STP1": 0.1,
                    "STP2": 0.1,
                    "STP3": 0.1,
                    "STP4": 0.1,
                    "STP5": 0.1,
                    "STP6": 0.1,
                    "STP7": 0.1,
                    "STP8": 0.1,
                    "STP9": 0.1,
                    "STP10": 0.1,
                }
            },
        },
    ),

    # ETHNICITY IN 6 CATEGORIES
    ethnicity=patients.with_these_clinical_events(
        ethnicity_codes,
        returning="category",
        find_last_match_in_period=True,
        include_date_of_match=True,
        return_expectations={
            "category": {
                "ratios": {
                    "1": 0.8,
                    "5": 0.1,
                    "3": 0.1
                }
            },
            "incidence": 0.9,
        },
    ),

    # HOUSEHOLD INFORMATION
    household_id=patients.household_as_of(
        "2020-02-01",
        returning="pseudo_id",
        return_expectations={
            "int": {
                "distribution": "normal",
                "mean": 1000,
                "stddev": 200
            },
            "incidence": 1,
        },
    ),
    household_size=patients.household_as_of(
        "2020-02-01",
        returning="household_size",
        return_expectations={
            "int": {
                "distribution": "normal",
                "mean": 3,
                "stddev": 1
            },
            "incidence": 1,
        },
    ),
    care_home_type=patients.care_home_status_as_of(
        "2020-02-01",
        categorised_as={
            "PC": """
              IsPotentialCareHome
              AND LocationDoesNotRequireNursing='Y'
              AND LocationRequiresNursing='N'
            """,
            "PN": """
              IsPotentialCareHome
              AND LocationDoesNotRequireNursing='N'
              AND LocationRequiresNursing='Y'
            """,
            "PS": "IsPotentialCareHome",
            "U": "DEFAULT",
        },
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "PC": 0.01,
                    "PN": 0.01,
                    "PS": 0.01,
                    "U": 0.97,
                },
            },
        },
    ),

    # CONTINUOUS MEASURED COVARIATES
    bmi=patients.most_recent_bmi(
        on_or_after="2010-02-01",
        minimum_age_at_measurement=16,
        include_measurement_date=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2015-01-31"
            },
            "float": {
                "distribution": "normal",
                "mean": 25,
                "stddev": 10
            },
            "incidence": 0.95,
        },
    ),

    # Blood pressure
    bp_sys=patients.mean_recorded_value(
        systolic_blood_pressure_codes,
        on_most_recent_day_of_measurement=True,
        on_or_before="2020-02-01",
        include_measurement_date=True,
        include_month=True,
        return_expectations={
            "float": {
                "distribution": "normal",
                "mean": 80,
                "stddev": 10
            },
            "date": {
                "latest": "2020-01-31"
            },
            "incidence": 0.95,
        },
    ),
    bp_dias=patients.mean_recorded_value(
        diastolic_blood_pressure_codes,
        on_most_recent_day_of_measurement=True,
        on_or_before="2020-02-01",
        include_measurement_date=True,
        include_month=True,
        return_expectations={
            "float": {
                "distribution": "normal",
                "mean": 120,
                "stddev": 10
            },
            "date": {
                "latest": "2020-01-31"
            },
            "incidence": 0.95,
        },
    ),

    # # Creatinine
    creatinine=patients.with_these_clinical_events(
        creatinine_codes,
        find_last_match_in_period=True,
        on_or_before="2020-02-01",
        returning="numeric_value",
        include_date_of_match=True,
        include_month=True,
        return_expectations={
            "float": {
                "distribution": "normal",
                "mean": 60.0,
                "stddev": 15
            },
            "date": {
                "earliest": "2019-02-28",
                "latest": "2020-01-31"
            },
            "incidence": 0.95,
        },
    ),

    # COVARIATES
    smoking_status=patients.categorised_as(
        {
            "S": "most_recent_smoking_code = 'S'",
            "E": """
                 most_recent_smoking_code = 'E' OR (
                   most_recent_smoking_code = 'N' AND ever_smoked
                 )
            """,
            "N": "most_recent_smoking_code = 'N' AND NOT ever_smoked",
            "M": "DEFAULT",
        },
        return_expectations={
            "category": {
                "ratios": {
                    "S": 0.6,
                    "E": 0.1,
                    "N": 0.2,
                    "M": 0.1
                }
            }
        },
        most_recent_smoking_code=patients.with_these_clinical_events(
            clear_smoking_codes,
            find_last_match_in_period=True,
            on_or_before="2020-02-01",
            returning="category",
        ),
        ever_smoked=patients.with_these_clinical_events(
            filter_codes_by_category(clear_smoking_codes, include=["S", "E"]),
            on_or_before="2020-02-01",
        ),
    ),
    smoking_status_date=patients.with_these_clinical_events(
        clear_smoking_codes,
        on_or_before="2020-02-01",
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "latest": "2020-01-31"
            },
            "incidence": 0.95,
        },
    ),
    chronic_respiratory_disease=patients.with_these_clinical_events(
        chronic_respiratory_disease_codes,
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-01-31"
        }},
    ),
    chronic_cardiac_disease=patients.with_these_clinical_events(
        chronic_cardiac_disease_codes,
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-01-31"
        }},
    ),

    #DIABETES TYPE
    type1_diabetes=patients.with_these_clinical_events(
        diabetes_t1_codes,
        on_or_before="2020-02-01",
        return_first_date_in_period=True,
        include_month=True,
    ),
    type2_diabetes=patients.with_these_clinical_events(
        diabetes_t2_codes,
        on_or_before="2020-02-01",
        return_first_date_in_period=True,
        include_month=True,
    ),
    unknown_diabetes=patients.with_these_clinical_events(
        diabetes_unknown_codes,
        on_or_before="2020-02-01",
        return_first_date_in_period=True,
        include_month=True,
    ),
    diabetes_type=patients.categorised_as(
        {
            "T1DM": """
                        (type1_diabetes AND NOT
                        type2_diabetes) 
                    OR
                        (((type1_diabetes AND type2_diabetes) OR 
                        (type1_diabetes AND unknown_diabetes AND NOT type2_diabetes) OR
                        (unknown_diabetes AND NOT type1_diabetes AND NOT type2_diabetes))
                        AND 
                        (insulin_lastyear_meds > 0 AND NOT
                        oad_lastyear_meds > 0))
                """,
            "T2DM": """
                        (type2_diabetes AND NOT
                        type1_diabetes)
                    OR
                        (((type1_diabetes AND type2_diabetes) OR 
                        (type2_diabetes AND unknown_diabetes AND NOT type1_diabetes) OR
                        (unknown_diabetes AND NOT type1_diabetes AND NOT type2_diabetes))
                        AND 
                        (oad_lastyear_meds > 0))
                """,
            "UNKNOWN_DM": """
                        ((unknown_diabetes AND NOT type1_diabetes AND NOT type2_diabetes) AND NOT
                        oad_lastyear_meds AND NOT
                        insulin_lastyear_meds) 
                """,
            "NO_DM": "DEFAULT",
        },
        return_expectations={
            "category": {
                "ratios": {
                    "T1DM": 0.03,
                    "T2DM": 0.2,
                    "UNKNOWN_DM": 0.02,
                    "NO_DM": 0.75
                }
            },
            "rate": "universal"
        },
        oad_lastyear_meds=patients.with_these_medications(
            oad_med_codes,
            between=["2019-02-01", "2020-02-01"],
            returning="number_of_matches_in_period",
        ),
        insulin_lastyear_meds=patients.with_these_medications(
            insulin_med_codes,
            between=["2019-02-01", "2020-02-01"],
            returning="number_of_matches_in_period",
        ),
    ),

    #EXETER ALGORITHM USING OPENSAFELY CODELISTS
    diabetes_exeter_os=patients.categorised_as(
        {
            "T1DM_EX_OS": """
        insulin_last6mo >= 2 AND t1dm_count >= t2dm_count * 2
        """,
            "T2DM_EX_OS": """
        (insulin_last6mo < 2 AND t2dm_count > 0)
        OR
        (insulin_last6mo >= 2 AND t1dm_count < t2dm_count * 2 AND t2dm_count > 0)
        """,
            "NO_DM": "DEFAULT",
        },
        return_expectations={
            "category": {
                "ratios": {
                    "T1DM_EX_OS": 0.03,
                    "T2DM_EX_OS": 0.2,
                    "NO_DM": 0.77
                }
            },
            "rate": "universal"
        },
        t1dm_count=patients.with_these_clinical_events(
            diabetes_t1_codes,
            on_or_before="2020-02-01",
            returning="number_of_matches_in_period",
        ),
        t2dm_count=patients.with_these_clinical_events(
            diabetes_t2_codes,
            on_or_before="2020-02-01",
            returning="number_of_matches_in_period",
        ),
        insulin_last6mo=patients.with_these_medications(
            insulin_med_codes,
            between=["2019-08-01", "2020-02-01"],
            returning="number_of_matches_in_period",
        ),
    ),

    #EXETER ALGORITHM USING EXETER CODELISTS
    diabetes_exeter=patients.categorised_as(
        {
            "T1DM_EX": """
        insulin_last6mo >= 2 AND t1dm_count_ex >= t2dm_count_ex * 2
        """,
            "T2DM_EX": """
        (insulin_last6mo < 2 AND t2dm_count_ex > 0)
        OR
        (insulin_last6mo >= 2 AND t1dm_count_ex < t2dm_count_ex * 2 AND t2dm_count_ex > 0)
        """,
            "NO_DM": "DEFAULT",
        },
        return_expectations={
            "category": {
                "ratios": {
                    "T1DM_EX": 0.1,
                    "T2DM_EX": 0.2,
                    "NO_DM": 0.7
                }
            },
            "rate": "universal"
        },
        t1dm_count_ex=patients.with_these_clinical_events(
            filter_codes_by_category(diabetes_t1t2_codes_exeter,
                                     include=["1"]),
            on_or_before="2020-02-01",
            returning="number_of_matches_in_period",
        ),
        t2dm_count_ex=patients.with_these_clinical_events(
            filter_codes_by_category(diabetes_t1t2_codes_exeter,
                                     include=["2"]),
            on_or_before="2020-02-01",
            returning="number_of_matches_in_period",
        ),
        insulin_last6mo_ex=patients.with_these_medications(
            insulin_med_codes,
            between=["2019-08-01", "2020-02-01"],
            returning="number_of_matches_in_period",
        ),
    ),

    ## HBA1C
    hba1c_mmol_per_mol=patients.with_these_clinical_events(
        hba1c_new_codes,
        find_last_match_in_period=True,
        on_or_before="2020-02-01",
        returning="numeric_value",
        include_date_of_match=True,
        include_month=True,
        return_expectations={
            "date": {
                "latest": "2020-02-29"
            },
            "float": {
                "distribution": "normal",
                "mean": 40.0,
                "stddev": 20
            },
            "incidence": 0.95,
        },
    ),
    hba1c_percentage=patients.with_these_clinical_events(
        hba1c_old_codes,
        find_last_match_in_period=True,
        on_or_before="2020-02-01",
        returning="numeric_value",
        include_date_of_match=True,
        include_month=True,
        return_expectations={
            "date": {
                "latest": "2020-02-29"
            },
            "float": {
                "distribution": "normal",
                "mean": 5,
                "stddev": 2
            },
            "incidence": 0.95,
        },
    ),

    # CANCER - 3 TYPES
    cancer_haem=patients.with_these_clinical_events(
        haem_cancer_codes,
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-01-31"
        }},
    ),
    cancer_nonhaem=patients.with_these_clinical_events(
        combine_codelists(lung_cancer_codes, other_cancer_codes),
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-01-31"
        }},
    ),

    #### PERMANENT
    permanent_immunodeficiency=patients.with_these_clinical_events(
        combine_codelists(hiv_codes, permanent_immune_codes,
                          sickle_cell_codes),
        on_or_before="2020-01-31",
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-01-31"
        }},
    ),
    asplenia=patients.with_these_clinical_events(
        spleen_codes,
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-01-31"
        }},
    ),

    ### TEMPROARY IMMUNE
    temporary_immunodeficiency=patients.with_these_clinical_events(
        combine_codelists(temp_immune_codes, aplastic_codes),
        between=["2019-02-01",
                 "2020-01-31"],  ## THIS IS RESTRICTED TO LAST YEAR
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={
            "date": {
                "earliest": "2019-03-01",
                "latest": "2020-01-31"
            }
        },
    ),
    chronic_liver_disease=patients.with_these_clinical_events(
        chronic_liver_disease_codes,
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-01-31"
        }},
    ),
    stroke_dementia=patients.with_these_clinical_events(
        combine_codelists(stroke, dementia),
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-01-31"
        }},
    ),
    other_neuro=patients.with_these_clinical_events(
        other_neuro,
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-01-31"
        }},
    ),

    # END STAGE RENAL DISEASE - DIALYSIS, TRANSPLANT OR END STAGE RENAL DISEASE
    esrf=patients.with_these_clinical_events(
        esrf_codes,
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-01-31"
        }},
    ),

    #Dialysis
    dialysis=patients.with_these_clinical_events(
        dialysis_codes,
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-01-31"
        }},
    ),

    #Kidney transplant
    kidney_transplant=patients.with_these_clinical_events(
        kidney_transplant_codes,
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-01-31"
        }},
    ),

    #Other organ transplant
    other_transplant=patients.with_these_clinical_events(
        other_transplant_codes,
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-01-31"
        }},
    ),

    # hypertension
    hypertension=patients.with_these_clinical_events(
        hypertension_codes,
        return_first_date_in_period=True,
        include_month=True,
    ),
    ra_sle_psoriasis=patients.with_these_clinical_events(
        ra_sle_psoriasis_codes,
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "latest": "2020-01-31"
        }},
    ),
    # asthma
    asthma=patients.categorised_as(
        {
            "0":
            "DEFAULT",
            "1":
            """
                (
                  recent_asthma_code OR (
                    asthma_code_ever AND NOT
                    copd_code_ever
                  )
                ) AND (
                  prednisolone_last_year = 0 OR 
                  prednisolone_last_year > 4
                )
            """,
            "2":
            """
                (
                  recent_asthma_code OR (
                    asthma_code_ever AND NOT
                    copd_code_ever
                  )
                ) AND
                prednisolone_last_year > 0 AND
                prednisolone_last_year < 5
                
            """,
        },
        return_expectations={
            "category": {
                "ratios": {
                    "0": 0.6,
                    "1": 0.1,
                    "2": 0.3
                }
            }
        },
        recent_asthma_code=patients.with_these_clinical_events(
            asthma_codes,
            between=["2017-02-01", "2020-02-01"],
        ),
        asthma_code_ever=patients.with_these_clinical_events(asthma_codes),
        copd_code_ever=patients.with_these_clinical_events(
            chronic_respiratory_disease_codes),
        prednisolone_last_year=patients.with_these_medications(
            pred_codes,
            between=["2019-02-01", "2020-02-01"],
            returning="number_of_matches_in_period",
        ),
    ),
)
Beispiel #26
0
study = StudyDefinition(
    # Configure the expectations framework
    default_expectations={
        "date": {"earliest": "1900-01-01", "latest": "today"},
        "rate": "uniform",
        "incidence": 0.5,
    },
    # STUDY POPULATION
    population=patients.all(),
    # has_follow_up AND
    # (age >=18 AND age <= 110) AND
    # (rheumatoid OR osteoarthritis) AND
    # imd >0 AND NOT (
    # (has_asthma AND saba_single) OR
    # aspirin_ten_years OR
    # stroke OR
    # mi OR
    # gi_bleed_ulcer
    # )
    has_follow_up=patients.registered_with_one_practice_between(
        "2019-02-28", "2020-02-29", return_expectations={"incidence": 0.9},
    ),
    has_asthma=patients.with_these_clinical_events(
        current_asthma_codes,
        between=["2017-02-28", "2020-02-29"],
        return_expectations={"incidence": 0.9},
    ),
    aspirin_ten_years=patients.with_these_medications(
        aspirin_med_codes,
        between=["2010-02-01", "2020-02-29"],
        returning="date",
        find_last_match_in_period=True,
        include_month=True,
        include_day=False,
        return_expectations={
            "date": {"earliest": "2010-11-01", "latest": "2020-02-29"}
        },
    ),
    age=patients.age_as_of(
        "2020-03-01",
        return_expectations={
            "rate": "universal",
            "int": {"distribution": "population_ages"},
        },
    ),
    imd=patients.address_as_of(
        "2020-02-29",
        returning="index_of_multiple_deprivation",
        round_to_nearest=100,
        return_expectations={
            "rate": "universal",
            "category": {"ratios": {"100": 0.1, "200": 0.2, "300": 0.7}},
        },
    ),
    stroke=patients.with_these_clinical_events(
        stroke_codes,
        on_or_before="2020-02-29",
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={"date": {"latest": "2020-02-29"}},
    ),
    #### Myocardial infarction
    mi=patients.with_these_clinical_events(
        mi_codes,
        on_or_before="2020-02-29",
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={"date": {"latest": "2020-02-29"}},
    ),
    #### GI BLEED
    gi_bleed_ulcer=patients.with_these_clinical_events(
        gi_bleed_ulcer_codes,
        on_or_before="2020-02-29",
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={"date": {"latest": "2020-02-29"}},
    ),
    # OSTEOARTHRITIS
    osteoarthritis=patients.with_these_clinical_events(
        osteoarthritis_codes,
        on_or_before="2020-02-29",
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={"date": {"latest": "2020-02-29"}},
    ),
    # RHEUMATOID ARTHRITIS
    rheumatoid=patients.with_these_clinical_events(
        rheumatoid_codes,
        on_or_before="2020-02-29",
        return_last_date_in_period=True,
        include_month=True,
        return_expectations={"date": {"latest": "2020-02-29"}},
    ),
    #### SABA SINGLE CONSTITUENT - asthma treatment
    saba_single=patients.with_these_medications(
        saba_med_codes,
        between=["2019-11-01", "2020-02-29"],
        returning="date",
        find_last_match_in_period=True,
        include_month=True,
        include_day=False,
        return_expectations={
            "date": {"earliest": "2019-11-01", "latest": "2020-02-29"},
        },
    ),
)
Beispiel #27
0
study = StudyDefinition(
    # Configure the expectations framework
    default_expectations={
        "date": {"earliest": "1970-01-01", "latest": "today"},
        "rate": "uniform",
        "incidence": 0.2,
    },

    # set an index date (as starting point)
    index_date="2020-02-01",

    # This line defines the study population that the below varaibles will be defined for 
    # currently registered patients restricts to those alive 
    # the age restriction is applied as current TPP linkage only includes linkages to old age care 
    population=patients.satisfying(
        """
        (age >= 65 AND age < 120) AND 
        is_registered_with_tpp  
        """,
        is_registered_with_tpp=patients.registered_as_of(
          "index_date"
        ),
    ),

    # TPP ADDRESS LINKAGE 
    # tpp defined care home as of date 
    tpp_care_home_type=patients.care_home_status_as_of(
        "index_date",
        categorised_as={
            "PC": """
              IsPotentialCareHome
              AND LocationDoesNotRequireNursing='Y'
              AND LocationRequiresNursing='N'
            """,
            "PN": """
              IsPotentialCareHome
              AND LocationDoesNotRequireNursing='N'
              AND LocationRequiresNursing='Y'
            """,
            "PS": "IsPotentialCareHome",
            "U": "DEFAULT",
        },
        return_expectations={
            "rate": "universal",
            "category": {"ratios": {"PC": 0.05, "PN": 0.05, "PS": 0.05, "U": 0.85,},},
        },
    ),

    # CODED EVENTS (extracted over different time periods to sense check)
    # incentivised codes ever
    snomed_carehome_ever=patients.with_these_clinical_events(
        nhse_care_home_des_codes,
        on_or_before="index_date",
        returning="binary_flag",
        return_expectations={"incidence": 0.1},
    ),
    # incentivised codes within past year 
    snomed_carehome_pastyear=patients.with_these_clinical_events(
        nhse_care_home_des_codes,
        between=["index_date - 1 year", "index_date"], 
        returning="binary_flag",
        return_expectations={"incidence": 0.1},
    ),

    #primis codes ever 
    primis_carehome_ever=patients.with_these_clinical_events(
        primis_codes,
        on_or_before="index_date",
        returning="binary_flag",
        return_expectations={"incidence": 0.1},
    ),
    
    #primis codes within past year 
    primis_carehome_pastyear=patients.with_these_clinical_events(
        primis_codes,
        between=["index_date - 1 year", "index_date"], 
        returning="binary_flag",
        return_expectations={"incidence": 0.1},
    ),

    # HOUSEHOLD RELATED VARIABLES 
    ## household ID  
    household_id=patients.household_as_of(
        "index_date",
        returning="pseudo_id",
        return_expectations={
            "int": {"distribution": "normal", "mean": 1000, "stddev": 200},
            "incidence": 1,
        },
    ),
    ## household size   
    household_size=patients.household_as_of(
        "index_date",
        returning="household_size",
        return_expectations={
            "int": {"distribution": "normal", "mean": 3, "stddev": 1},
            "incidence": 1,
        },
    ),
    # mixed household flag 
    nontpp_household=patients.household_as_of(
        "index_date",
        returning="has_members_in_other_ehr_systems",
        return_expectations={ "incidence": 0.75
        },
    ),
    # mixed household percentage 
    tpp_coverage=patients.household_as_of(
        "index_date", 
        returning="percentage_of_members_with_data_in_this_backend", 
        return_expectations={
            "int": {"distribution": "normal", "mean": 75, "stddev": 10},
            "incidence": 1,
        },
    ),

    # AGE and AGE CATEGORIES (latter needed for comparison w. census only)
    # age 
    age=patients.age_as_of(
        "index_date",
        return_expectations={
            "rate": "universal",
            "int": {"distribution": "population_ages"},
        },
    ),
    # age band 
    ageband_narrow = patients.categorised_as(
        {   
            "0": "DEFAULT",
            "65-74": """ age >=  65 AND age < 75""",
            "75-79": """ age >=  75 AND age < 80""",
            "80-84": """ age >=  80 AND age < 85""",
            "85-89": """ age >=  85 AND age < 120""",
        },
        return_expectations={
            "rate":"universal",
            "category": {"ratios": {"65-74": 0.4, "75-79": 0.2, "80-84":0.2, "85-89":0.2 }}
        },
    ),

    # SELECTED DEMOGRAPHIC CHARACTERISTICS TO DESCRIBE 
    # sex 
    sex=patients.sex(
        return_expectations={
            "rate": "universal",
            "category": {"ratios": {"M": 0.49, "F": 0.51}},
        }
    ),
    # self-reported ethnicity 
    ethnicity=patients.with_these_clinical_events(
        ethnicity_codes,
        returning="category",
        find_last_match_in_period=True,
        include_date_of_match=False,
        return_expectations={
            "category": {"ratios": {"1": 0.8, "5": 0.1, "3": 0.1}},
            "incidence": 0.75,
        },
    ),
    # grouped region of the practice
    region=patients.registered_practice_as_of(
        "index_date",
        returning="nuts1_region_name",
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "North East": 0.1,
                    "North West": 0.1,
                    "Yorkshire and the Humber": 0.1,
                    "East Midlands": 0.1,
                    "West Midlands": 0.1,
                    "East of England": 0.1,
                    "London": 0.2,
                    "South East": 0.2,
                },
            },
        },
    ),
    # imd 
    imd=patients.address_as_of(
        "index_date",
        returning="index_of_multiple_deprivation",
        round_to_nearest=100,
        return_expectations={
            "rate": "universal",
            "category": {"ratios": {"100": 0.1, "200": 0.2, "300": 0.7}},
        },
    ), 

    # SELECTED CLINICAL CHARACTERISTICS TO DESCRIBE 
    # diabetes
    diabetes=patients.with_these_clinical_events(
        diabetes_codes,
        on_or_before="index_date",
        returning="binary_flag",
        return_expectations={"incidence": 0.10},
    ),
    # chronic respiratory disease (excl asthma)
    chronic_respiratory_disease=patients.with_these_clinical_events(
        chronic_respiratory_disease_codes,
        on_or_before="index_date",
        returning="binary_flag",
        return_expectations={"incidence": 0.20},
    ),
    # stroke
    stroke=patients.with_these_clinical_events(
        stroke_codes,
        on_or_before="index_date",
        returning="binary_flag",
        return_expectations={"incidence": 0.05},
    ),
    # chronic heart disease
    chronic_cardiac_disease=patients.with_these_clinical_events(
        chronic_cardiac_disease_codes,
        on_or_before="index_date",
        returning="binary_flag",
        return_expectations={"incidence": 0.35},
    ),
    # dementia
    dementia=patients.with_these_clinical_events(
        dementia_codes,
        on_or_before="index_date",
        returning="binary_flag",
        return_expectations={"incidence": 0.45},
    ),
    # cancer
    cancer=patients.satisfying(
        "lung_cancer OR haem_cancer OR other_cancer",
        lung_cancer=patients.with_these_clinical_events(
        lung_cancer_codes,
        on_or_before="index_date",
        returning="binary_flag",
        return_expectations={"incidence": 0.10},
        ),
        haem_cancer=patients.with_these_clinical_events(
        haem_cancer_codes,
        on_or_before="index_date",
        return_expectations={"incidence": 0.05},
        ),
        other_cancer=patients.with_these_clinical_events(
        other_cancer_codes,
        on_or_before="index_date",
        return_expectations={"incidence": 0.10},
        ),

    ),


)
Beispiel #28
0
study = StudyDefinition(
    # Configure the expectations framework
    default_expectations={
        "date": {
            "earliest": "1970-01-01",
            "latest": latest_date
        },
        "rate": "uniform",
        "incidence": 0.2,
    },
    # This line defines the study population
    population=patients.satisfying("""
        registered = 1
        AND
        (covid_vacc_date
        OR
        (age >=70 AND age <= 110) 
        OR
        (care_home_type))
        AND
        NOT has_died

        """),
    has_follow_up=patients.registered_with_one_practice_between(
        start_date="2019-12-01",
        end_date=campaign_start,
        return_expectations={"incidence": 0.90},
    ),
    registered=patients.registered_as_of(
        campaign_start,  # day before vaccination campaign starts - discuss with team if this should be "today"
        return_expectations={"incidence": 0.98},
    ),
    has_died=patients.died_from_any_cause(
        on_or_before=campaign_start,
        returning="binary_flag",
        return_expectations={"incidence": 0.05},
    ),

    # Demographic information

    # CAREHOME STATUS
    care_home_type=patients.care_home_status_as_of(
        campaign_start,
        categorised_as={
            "PC": """
              IsPotentialCareHome
              AND LocationDoesNotRequireNursing='Y'
              AND LocationRequiresNursing='N'
            """,
            "PN": """
              IsPotentialCareHome
              AND LocationDoesNotRequireNursing='N'
              AND LocationRequiresNursing='Y'
            """,
            "PS": "IsPotentialCareHome",
            "": "DEFAULT",  # use empty string 
        },
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "PC": 0.05,
                    "PN": 0.05,
                    "PS": 0.05,
                    "": 0.85,
                },
            },
        },
    ),

    # simple care home flag
    care_home=patients.categorised_as(
        {
            1: """care_home_type""",
            0: "DEFAULT",
        },
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    1: 0.15,
                    0: 0.85,
                },
            },
        },
    ),
    age=patients.age_as_of(
        "2021-03-31",  # PHE defined date for calulating eligibilty across all vaccination campaigns
        return_expectations={
            "rate": "universal",
            "int": {
                "distribution": "population_ages"
            },
        },
    ),
    ageband=patients.categorised_as(
        {
            "0": "DEFAULT",
            # consider doing an under 16 age band as well to differentiate between workers and children eligble for another reason
            "0-19": """ age >= 0 AND age < 20""",
            "20-29": """ age >= 20 AND age < 30""",
            "30-39": """ age >= 30 AND age < 40""",
            "40-49": """ age >= 40 AND age < 50""",
            "50-59": """ age >= 50 AND age < 60""",
            "60-69": """ age >= 60 AND age < 70""",
            "70-79": """ age >= 70 AND age < 80""",
            "80+":
            """ age >=  80 AND age < 120""",  # age eligibility currently set at 80
        },
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "0-19": 0.125,
                    "20-29": 0.125,
                    "30-39": 0.125,
                    "40-49": 0.125,
                    "50-59": 0.125,
                    "60-69": 0.125,
                    "70-79": 0.125,
                    "80+": 0.125,
                }
            },
        },
    ),

    # age bands for patients not in care homes (ie living in the community)
    ageband_community=patients.categorised_as(
        {
            "care home": "DEFAULT",
            "0-19": """ age >= 0 AND age < 20 AND NOT care_home_type""",
            "20-29": """ age >= 20 AND age < 30 AND NOT care_home_type""",
            "30-39": """ age >= 30 AND age < 40 AND NOT care_home_type""",
            "40-49": """ age >= 40 AND age < 50 AND NOT care_home_type""",
            "50-59": """ age >= 50 AND age < 60 AND NOT care_home_type""",
            "60-69": """ age >= 60 AND age < 70 AND NOT care_home_type""",
            "70-79": """ age >= 70 AND age < 80 AND NOT care_home_type""",
            "80+": """ age >=  80 AND age < 120 AND NOT care_home_type""",
        },
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "care home": 0.025,
                    "0-19": 0.1,
                    "20-29": 0.125,
                    "30-39": 0.125,
                    "40-49": 0.125,
                    "50-59": 0.125,
                    "60-69": 0.125,
                    "70-79": 0.125,
                    "80+": 0.125,
                }
            },
        },
    ),
    sex=patients.sex(return_expectations={
        "rate": "universal",
        "category": {
            "ratios": {
                "M": 0.49,
                "F": 0.51
            }
        },
    }),
    # ETHNICITY IN 16 CATEGORIES
    ethnicity_16=patients.with_these_clinical_events(
        ethnicity_codes_16,
        returning="category",
        find_last_match_in_period=True,
        include_date_of_match=False,
        return_expectations={
            "category": {
                "ratios": {
                    "1": 0.0625,
                    "2": 0.0625,
                    "3": 0.0625,
                    "4": 0.0625,
                    "5": 0.0625,
                    "6": 0.0625,
                    "7": 0.0625,
                    "8": 0.0625,
                    "9": 0.0625,
                    "10": 0.0625,
                    "11": 0.0625,
                    "12": 0.0625,
                    "13": 0.0625,
                    "14": 0.0625,
                    "15": 0.0625,
                    "16": 0.0625,
                }
            },
            "incidence": 0.75,
        },
    ),
    # ETHNICITY IN 6 CATEGORIES
    ethnicity=patients.with_these_clinical_events(
        ethnicity_codes,
        returning="category",
        find_last_match_in_period=True,
        include_date_of_match=False,
        return_expectations={
            "category": {
                "ratios": {
                    "1": 0.2,
                    "2": 0.2,
                    "3": 0.2,
                    "4": 0.2,
                    "5": 0.2
                }
            },
            "incidence": 0.75,
        },
    ),
    # practice pseudo id
    practice_id=patients.registered_practice_as_of(
        campaign_start,  # day before vaccine campaign start
        returning="pseudo_id",
        return_expectations={
            "int": {
                "distribution": "normal",
                "mean": 1000,
                "stddev": 100
            },
            "incidence": 1,
        },
    ),
    # stp is an NHS administration region based on geography
    stp=patients.registered_practice_as_of(
        campaign_start,
        returning="stp_code",
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "STP1": 0.1,
                    "STP2": 0.1,
                    "STP3": 0.1,
                    "STP4": 0.1,
                    "STP5": 0.1,
                    "STP6": 0.1,
                    "STP7": 0.1,
                    "STP8": 0.1,
                    "STP9": 0.1,
                    "STP10": 0.1,
                }
            },
        },
    ),
    # NHS administrative region
    region=patients.registered_practice_as_of(
        campaign_start,
        returning="nuts1_region_name",
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "North East": 0.1,
                    "North West": 0.1,
                    "Yorkshire and the Humber": 0.2,
                    "East Midlands": 0.1,
                    "West Midlands": 0.1,
                    "East of England": 0.1,
                    "London": 0.1,
                    "South East": 0.2,
                },
            },
        },
    ),

    # IMD - quintile
    imd=patients.categorised_as(
        {
            "0":
            "DEFAULT",
            "1":
            """index_of_multiple_deprivation >=1 AND index_of_multiple_deprivation < 32844*1/5""",
            "2":
            """index_of_multiple_deprivation >= 32844*1/5 AND index_of_multiple_deprivation < 32844*2/5""",
            "3":
            """index_of_multiple_deprivation >= 32844*2/5 AND index_of_multiple_deprivation < 32844*3/5""",
            "4":
            """index_of_multiple_deprivation >= 32844*3/5 AND index_of_multiple_deprivation < 32844*4/5""",
            "5":
            """index_of_multiple_deprivation >= 32844*4/5 AND index_of_multiple_deprivation < 32844""",
        },
        index_of_multiple_deprivation=patients.address_as_of(
            campaign_start,
            returning="index_of_multiple_deprivation",
            round_to_nearest=100,
        ),
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "0": 0.05,
                    "1": 0.19,
                    "2": 0.19,
                    "3": 0.19,
                    "4": 0.19,
                    "5": 0.19,
                }
            },
        },
    ),

    # BMI
    bmi=patients.categorised_as(
        {
            "Not obese": "DEFAULT",
            "Obese I (30-34.9)": """ bmi_value >= 30 AND bmi_value < 35""",
            "Obese II (35-39.9)": """ bmi_value >= 35 AND bmi_value < 40""",
            "Obese III (40+)": """ bmi_value >= 40 AND bmi_value < 100""",
            # set maximum to avoid any impossibly extreme values being classified as obese
        },
        bmi_value=patients.most_recent_bmi(on_or_after="2015-12-01",
                                           minimum_age_at_measurement=16),
        return_expectations={
            "rate": "universal",
            "category": {
                "ratios": {
                    "Not obese": 0.7,
                    "Obese I (30-34.9)": 0.1,
                    "Obese II (35-39.9)": 0.1,
                    "Obese III (40+)": 0.1,
                }
            },
        },
    ),

    # CLINICAL CO-MORBIDITIES WORK IN PROGRESS IN COLLABORATION WITH NHSX
    # https://github.com/opensafely/vaccine-eligibility/blob/master/analysis/study_definition.py
    chronic_cardiac_disease=patients.with_these_clinical_events(
        chronic_cardiac_disease_codes,
        on_or_before=campaign_start,
        returning="binary_flag",
        return_expectations={
            "incidence": 0.01,
        },
    ),
    current_copd=patients.with_these_clinical_events(
        current_copd_codes,
        on_or_before=campaign_start,
        returning="binary_flag",
        return_expectations={
            "incidence": 0.01,
        },
    ),
    # on a dmard - indicative of immunosuppression
    dmards=patients.with_these_medications(
        dmards_codes,
        on_or_before=campaign_start,
        returning="binary_flag",
        return_expectations={
            "incidence": 0.01,
        },
    ),
    # dementia
    dementia=patients.with_these_clinical_events(
        dementia_codes,
        on_or_before=campaign_start,
        returning="binary_flag",
        return_expectations={
            "incidence": 0.01,
        },
    ),
    dialysis=patients.with_these_clinical_events(
        dialysis_codes,
        on_or_before=campaign_start,
        returning="binary_flag",
        return_expectations={
            "incidence": 0.01,
        },
    ),
    solid_organ_transplantation=patients.with_these_clinical_events(
        solid_organ_transplantation_codes,
        on_or_before=campaign_start,
        returning="binary_flag",
        return_expectations={
            "incidence": 0.01,
        },
    ),
    chemo_or_radio=patients.with_these_clinical_events(
        chemotherapy_or_radiotherapy_codes,
        on_or_before=campaign_start,
        returning="binary_flag",
        return_expectations={
            "incidence": 0.01,
        },
    ),
    intel_dis_incl_downs_syndrome=patients.with_these_clinical_events(
        intellectual_disability_including_downs_syndrome_codes,
        on_or_before=campaign_start,
        returning="binary_flag",
        return_expectations={
            "incidence": 0.01,
        },
    ),
    lung_cancer=patients.with_these_clinical_events(
        lung_cancer_codes,
        on_or_before=campaign_start,
        returning="binary_flag",
        return_expectations={
            "incidence": 0.01,
        },
    ),
    cancer_excl_lung_and_haem=patients.with_these_clinical_events(
        cancer_excluding_lung_and_haematological_codes,
        on_or_before=campaign_start,
        returning="binary_flag",
        return_expectations={
            "incidence": 0.01,
        },
    ),
    haematological_cancer=patients.with_these_clinical_events(
        haematological_cancer_codes,
        on_or_before=campaign_start,
        returning="binary_flag",
        return_expectations={
            "incidence": 0.01,
        },
    ),
    bone_marrow_transplant=patients.with_these_clinical_events(
        bone_marrow_transplant_codes,
        between=["2020-07-01", campaign_start],
        returning="binary_flag",
        return_expectations={
            "incidence": 0.01,
        },
    ),
    cystic_fibrosis=patients.with_these_clinical_events(
        cystic_fibrosis_codes,
        on_or_before=campaign_start,
        returning="binary_flag",
        return_expectations={
            "incidence": 0.01,
        },
    ),
    sickle_cell_disease=patients.with_these_clinical_events(
        sickle_cell_disease_codes,
        on_or_before=campaign_start,
        returning="binary_flag",
        return_expectations={
            "incidence": 0.01,
        },
    ),
    permanant_immunosuppression=patients.with_these_clinical_events(
        permanent_immunosuppression_codes,
        on_or_before=campaign_start,
        returning="binary_flag",
        return_expectations={
            "incidence": 0.01,
        },
    ),
    temporary_immunosuppression=patients.with_these_clinical_events(
        temporary_immunosuppression_codes,
        on_or_before=campaign_start,
        returning="binary_flag",
        return_expectations={
            "incidence": 0.01,
        },
    ),
    #
    psychosis_schiz_bipolar=patients.with_these_clinical_events(
        psychosis_schizophrenia_bipolar_affective_disease_codes,
        on_or_before=campaign_start,
        returning="binary_flag",
        return_expectations={
            "incidence": 0.01,
        },
    ),
    ssri=patients.with_these_medications(
        ssri_codes,
        between=["2019-12-01", campaign_start],
        returning="binary_flag",
        return_expectations={
            "incidence": 0.01,
        },
    ),

    # https://github.com/opensafely/codelist-development/issues/4
    asplenia=patients.with_these_clinical_events(
        asplenia_codes,
        on_or_before=campaign_start,
        returning="binary_flag",
        return_expectations={
            "incidence": 0.01,
        },
    ),

    ###############################################################################
    # COVID VACCINATION
    ###############################################################################
    # any COVID vaccination (first dose)
    covid_vacc_date=patients.with_tpp_vaccination_record(
        target_disease_matches="SARS-2 CORONAVIRUS",
        on_or_after="2020-12-01",  # check all december to date
        find_first_match_in_period=True,
        returning="date",
        date_format="YYYY-MM-DD",
        return_expectations={
            "date": {
                "earliest":
                "2020-12-08",  # first vaccine administered on the 8/12
                "latest": "2021-01-31",
            },
            "incidence": 0.4
        },
    ),
    # SECOND DOSE COVID VACCINATION
    covid_vacc_second_dose_date=patients.with_tpp_vaccination_record(
        target_disease_matches="SARS-2 CORONAVIRUS",
        on_or_after="covid_vacc_date + 19 days",
        find_first_match_in_period=True,
        returning="date",
        date_format="YYYY-MM-DD",
        return_expectations={
            "date": {
                "earliest":
                "2020-12-29",  # first reported second dose administered on the 29/12
                "latest": latest_date,
            },
            "incidence": 0.1
        },
    ),
    # COVID VACCINATION - Pfizer BioNTech
    covid_vacc_pfizer_date=patients.with_tpp_vaccination_record(
        product_name_matches=
        "COVID-19 mRNA Vac BNT162b2 30mcg/0.3ml conc for susp for inj multidose vials (Pfizer-BioNTech)",
        on_or_after="2020-12-01",  # check all december to date
        find_first_match_in_period=True,
        returning="date",
        date_format="YYYY-MM-DD",
        return_expectations={
            "date": {
                "earliest":
                "2020-12-08",  # first vaccine administered on the 8/12
                "latest": latest_date,
            },
            "incidence": 0.3
        },
    ),
    # COVID VACCINATION - Oxford AZ
    covid_vacc_oxford_date=patients.with_tpp_vaccination_record(
        product_name_matches=
        "COVID-19 Vac AstraZeneca (ChAdOx1 S recomb) 5x10000000000 viral particles/0.5ml dose sol for inj MDV",
        on_or_after="2020-12-01",  # check all december to date
        find_first_match_in_period=True,
        returning="date",
        date_format="YYYY-MM-DD",
        return_expectations={
            "date": {
                "earliest":
                "2020-01-04",  # first vaccine administered on the 4/1
                "latest": latest_date,
            },
            "incidence": 0.1
        },
    ),

    # EPI-PEN
    adrenaline_pen=patients.with_these_medications(
        adrenaline_pen,
        on_or_after="2018-12-01",  # look for last two years
        returning="binary_flag",
        return_last_date_in_period=False,
        include_month=False,
        return_expectations={
            "date": {
                "earliest": "2018-12-01",
                "latest": latest_date
            },
            "incidence": 0.001,
        },
    ),
)
from cohortextractor import StudyDefinition, patients, codelist, codelist_from_csv


study = StudyDefinition(
    default_expectations={
        "date": {"earliest": "1900-01-01", "latest": "today"},
        "rate": "uniform",
        "incidence": 0.5,
    },
    population=patients.registered_with_one_practice_between(
        "2019-02-01", "2020-02-01"
    ),
        age=patients.age_as_of(
        "2019-09-01",
        return_expectations={
            "rate": "universal",
            "int": {"distribution": "population_ages"},
        },
    ),
)
study = StudyDefinition(
    # Configure the expectations framework
    default_expectations={
        "date": {
            "earliest": index_date,
            "latest": "today"
        },
        "rate": "uniform",
        "incidence": 1
    },
    index_date=index_date,

    # This line defines the study population
    population=patients.satisfying(
        """
        (age >= 18 AND age < 120) AND
        (NOT died) AND
        (registered)
        """,
        died=patients.died_from_any_cause(on_or_before=index_date,
                                          returning="binary_flag"),
        registered=patients.registered_as_of(index_date),
        age=patients.age_as_of(index_date),
    ),

    ### geographic/administrative groups
    practice=patients.registered_practice_as_of(
        index_date,
        returning="pseudo_id",
        return_expectations={
            "int": {
                "distribution": "normal",
                "mean": 100,
                "stddev": 20
            }
        },
    ),
    stp=patients.registered_practice_as_of(
        index_date,
        returning="stp_code",
        return_expectations={
            "category": {
                "ratios": dict_stp
            },
        },
    ),
    cholesterol=patients.with_these_clinical_events(
        codes_cholesterol,
        returning="number_of_episodes",
        between=["index_date", "index_date + 1 month"],
        return_expectations={
            "int": {
                "distribution": "normal",
                "mean": 2,
                "stddev": 0.5
            }
        },
    ),
    inr=patients.with_these_clinical_events(
        codes_inr,
        returning="number_of_episodes",
        between=["index_date", "index_date + 1 month"],
        return_expectations={
            "int": {
                "distribution": "normal",
                "mean": 3,
                "stddev": 0.5
            }
        },
    ),
)