def test_address_dtype_generation(): study = StudyDefinition( # This line defines the study population population=patients.all(), rural_urban=patients.address_as_of( "2020-02-01", returning="rural_urban_classification" ), ) result = _converters_to_names(study.pandas_csv_args) assert result == { "dtype": {"rural_urban": "category"}, "parse_dates": [], "date_col_for": {}, "converters": {}, }
"STP7": 0.1, "STP8": 0.1, "STP9": 0.1, "STP10": 0.1, } }, }, ), imd=patients.address_as_of( "2020-02-29", returning="index_of_multiple_deprivation", round_to_nearest=100, return_expectations={ "rate": "universal", "category": { "ratios": { "100": 0.1, "200": 0.2, "300": 0.7 } }, }, ), ethnicity=patients.with_these_clinical_events( ethnicity_codes, returning="category", find_last_match_in_period=True, include_date_of_match=True, return_expectations={ "category": { "ratios": {
on_or_before="2020-06-01", returning="date_of_death", include_month=True, include_day=True, ), # The rest of the lines define the covariates with associated GitHub issues # https://github.com/ebmdatalab/tpp-sql-notebook/issues/33 age=patients.age_as_of("2020-02-01"), # https://github.com/ebmdatalab/tpp-sql-notebook/issues/46 sex=patients.sex(), # https://github.com/ebmdatalab/tpp-sql-notebook/issues/52 imd=patients.address_as_of( "2020-02-01", returning="index_of_multiple_deprivation", round_to_nearest=100 ), # https://github.com/ebmdatalab/tpp-sql-notebook/issues/37 rural_urban=patients.address_as_of( "2020-02-01", returning="rural_urban_classification" ), # https://github.com/ebmdatalab/tpp-sql-notebook/issues/54 stp=patients.registered_practice_as_of("2020-02-01", returning="stp_code"), # region - one of NHS England 9 regions region=patients.registered_practice_as_of("2020-02-01", returning="nhse_region_name"), # https://github.com/ebmdatalab/tpp-sql-notebook/issues/10 bmi=patients.most_recent_bmi(
"ratios": { "MSOA1": 0.5, "MSOA2": 0.5 } }, }, ), # https://github.com/ebmdatalab/tpp-sql-notebook/issues/52 imd=patients.address_as_of( "2020-02-01", returning="index_of_multiple_deprivation", round_to_nearest=100, return_expectations={ "rate": "universal", "category": { "ratios": { "100": 0.1, "200": 0.2, "300": 0.7 } }, }, ), rural_urban=patients.address_as_of( "2020-02-01", returning="rural_urban_classification", return_expectations={ "rate": "universal", "category": { "ratios": { "rural": 0.1,
def test_patients_address_as_of(): session = make_session() patient = Patient() patient.Addresses.append( PatientAddress( StartDate="1990-01-01", EndDate="2018-01-01", ImdRankRounded=100, RuralUrbanClassificationCode=1, ) ) # We deliberately create overlapping address periods here to check that we # handle these correctly patient.Addresses.append( PatientAddress( StartDate="2018-01-01", EndDate="2020-02-01", ImdRankRounded=200, RuralUrbanClassificationCode=1, ) ) patient.Addresses.append( PatientAddress( StartDate="2019-01-01", EndDate="2022-01-01", ImdRankRounded=300, RuralUrbanClassificationCode=2, ) ) patient.Addresses.append( PatientAddress( StartDate="2022-01-01", EndDate="9999-12-31", ImdRankRounded=500, RuralUrbanClassificationCode=3, ) ) patient_no_address = Patient() patient_only_old_address = Patient() patient_only_old_address.Addresses.append( PatientAddress( StartDate="2010-01-01", EndDate="2015-01-01", ImdRankRounded=100, RuralUrbanClassificationCode=1, ) ) session.add_all([patient, patient_no_address, patient_only_old_address]) session.commit() study = StudyDefinition( population=patients.all(), imd=patients.address_as_of( "2020-01-01", returning="index_of_multiple_deprivation", round_to_nearest=100, ), rural_urban=patients.address_as_of( "2020-01-01", returning="rural_urban_classification" ), ) results = study.to_dicts() assert [i["imd"] for i in results] == ["300", "0", "0"] assert [i["rural_urban"] for i in results] == ["2", "0", "0"]