def test_make_df_from_expectations_with_distribution_and_date(): study = StudyDefinition( population=patients.all(), bmi=patients.most_recent_bmi( on_or_after="2010-02-01", minimum_age_at_measurement=16, return_expectations={ "rate": "exponential_increase", "incidence": 0.6, "float": { "distribution": "normal", "mean": 35, "stddev": 10 }, "date": { "earliest": "1900-01-01", "latest": "today" }, }, ), bmi_date_measured=patients.date_of( "bmi", date_format="YYYY-MM", ), ) population_size = 10000 result = study.make_df_from_expectations(population_size) assert list(sorted(result.columns)) == ["bmi", "bmi_date_measured"] # Check that the null-valued rows are aligned with each other assert (result["bmi"][pd.isnull( result["bmi"])].fillna(0) == result["bmi_date_measured"][pd.isnull( result["bmi_date_measured"])].fillna(0)).all()
def test_bmi_dtype_generation(): categorised_codelist = codelist([("X", "Y")], system="ctv3") categorised_codelist.has_categories = True study = StudyDefinition( population=patients.all(), bmi=patients.most_recent_bmi( on_or_after="2010-02-01", minimum_age_at_measurement=16, ), bmi_date_measured=patients.date_of("bmi", date_format="YYYY-MM"), ) result = _converters_to_names(study.pandas_csv_args) assert result == { "converters": { "bmi_date_measured": "add_day_to_date" }, "dtype": { "bmi": "float" }, "date_col_for": { "bmi": "bmi_date_measured" }, "parse_dates": ["bmi_date_measured"], }
def test_clinical_events_numeric_value_dtype_generation(): test_codelist = codelist(["X"], system="ctv3") study = StudyDefinition( population=patients.all(), creatinine=patients.with_these_clinical_events( test_codelist, find_last_match_in_period=True, on_or_before="2020-02-01", returning="numeric_value", ), creatinine_date=patients.date_of("creatinine", date_format="YYYY-MM"), ) result = _converters_to_names(study.pandas_csv_args) assert result == { "converters": { "creatinine_date": "add_day_to_date" }, "dtype": { "creatinine": "float" }, "date_col_for": { "creatinine": "creatinine_date" }, "parse_dates": ["creatinine_date"], }
def test_explicit_bmi_fallback(): session = make_session() weight_code = "X76C7" bmi_code = "22K.." patient = Patient(DateOfBirth="1950-01-01") patient.CodedEvents.append( CodedEvent(CTV3Code=weight_code, NumericValue=50, ConsultationDate="2001-06-01") ) patient.CodedEvents.append( CodedEvent(CTV3Code=bmi_code, NumericValue=99, ConsultationDate="2001-10-01") ) session.add(patient) session.commit() study = StudyDefinition( population=patients.all(), BMI=patients.most_recent_bmi( on_or_after="1995-01-01", on_or_before="2005-01-01", ), BMI_date_measured=patients.date_of("BMI", date_format="YYYY-MM-DD"), ) results = study.to_dicts() assert [x["BMI"] for x in results] == ["99.0"] assert [x["BMI_date_measured"] for x in results] == ["2001-10-01"]
def test_categorical_clinical_events_with_date_dtype_generation(): categorised_codelist = codelist([("X", "Y")], system="ctv3") categorised_codelist.has_categories = True study = StudyDefinition( population=patients.all(), ethnicity=patients.with_these_clinical_events( categorised_codelist, returning="category", find_last_match_in_period=True, ), ethnicity_date=patients.date_of("ethnicity"), ) result = _converters_to_names(study.pandas_csv_args) assert result == { "converters": { "ethnicity_date": "add_month_and_day_to_date" }, "date_col_for": { "ethnicity": "ethnicity_date" }, "dtype": { "ethnicity": "category" }, "parse_dates": ["ethnicity_date"], }
def test_bmi_when_only_some_measurements_of_child(): session = make_session() bmi_code = "22K.." weight_code = "X76C7" height_code = "XM01E" patient = Patient(DateOfBirth="1990-01-01") patient.CodedEvents.append( CodedEvent(CTV3Code=bmi_code, NumericValue=99, ConsultationDate="1995-01-01") ) patient.CodedEvents.append( CodedEvent(CTV3Code=weight_code, NumericValue=50, ConsultationDate="2010-01-01") ) patient.CodedEvents.append( CodedEvent(CTV3Code=height_code, NumericValue=10, ConsultationDate="2010-01-01") ) session.add(patient) session.commit() study = StudyDefinition( population=patients.all(), BMI=patients.most_recent_bmi( on_or_after="2005-01-01", on_or_before="2015-01-01", ), BMI_date_measured=patients.date_of("BMI", date_format="YYYY-MM-DD"), ) results = study.to_dicts() assert [x["BMI"] for x in results] == ["0.5"] assert [x["BMI_date_measured"] for x in results] == ["2010-01-01"]
def test_bmi_rounded(): session = make_session() weight_code = "X76C7" height_code = "XM01E" patient = Patient(DateOfBirth="1950-01-01") patient.CodedEvents.append( CodedEvent( CTV3Code=weight_code, NumericValue=10.12345, ConsultationDate="2001-06-01" ) ) patient.CodedEvents.append( CodedEvent(CTV3Code=height_code, NumericValue=10, ConsultationDate="2000-02-01") ) session.add(patient) session.commit() study = StudyDefinition( population=patients.all(), BMI=patients.most_recent_bmi("2005-01-01",), BMI_date_measured=patients.date_of("BMI", date_format="YYYY-MM-DD"), ) results = study.to_dicts() assert [x["BMI"] for x in results] == ["0.1"] assert [x["BMI_date_measured"] for x in results] == ["2001-06-01"]
def test_clinical_event_with_category(): session = make_session() session.add_all( [ Patient(), Patient( CodedEvents=[ CodedEvent(CTV3Code="foo1", ConsultationDate="2018-01-01"), CodedEvent(CTV3Code="foo2", ConsultationDate="2020-01-01"), ] ), Patient( CodedEvents=[CodedEvent(CTV3Code="foo3", ConsultationDate="2019-01-01")] ), ] ) session.commit() codes = codelist([("foo1", "A"), ("foo2", "B"), ("foo3", "C")], "ctv3") study = StudyDefinition( population=patients.all(), code_category=patients.with_these_clinical_events( codes, returning="category", find_last_match_in_period=True ), code_category_date=patients.date_of("code_category"), ) results = study.to_dicts() assert [x["code_category"] for x in results] == ["", "B", "C"] assert [x["code_category_date"] for x in results] == ["", "2020", "2019"]
def test_clinical_event_with_numeric_value(): condition_code = "ASTHMA" _make_clinical_events_selection( condition_code, patient_dates=[ None, # Include date before period starts, which should be ignored [ ("2001-01-01", 1), ("2002-01-01", 2), ("2002-02-01", 3), ("2002-06-01", 4), ], [("2001-06-01", 7)], ], ) study = StudyDefinition( population=patients.all(), asthma_value=patients.with_these_clinical_events( codelist([condition_code], "ctv3"), between=["2001-12-01", "2002-06-01"], returning="numeric_value", find_first_match_in_period=True, ), asthma_value_date=patients.date_of("asthma_value", date_format="YYYY-MM"), ) results = study.to_dicts() assert [x["asthma_value"] for x in results] == ["0.0", "2.0", "0.0"] assert [x["asthma_value_date"] for x in results] == ["", "2002-01", ""]
def test_simple_bmi(include_dates): session = make_session() weight_code = "X76C7" height_code = "XM01E" patient = Patient(DateOfBirth="1950-01-01") patient.CodedEvents.append( CodedEvent(CTV3Code=weight_code, NumericValue=50, ConsultationDate="2002-06-01") ) patient.CodedEvents.append( CodedEvent(CTV3Code=height_code, NumericValue=10, ConsultationDate="2001-06-01") ) session.add(patient) session.commit() if include_dates == "none": bmi_date = None date_query = None elif include_dates == "year": bmi_date = "2002" date_query = patients.date_of("BMI") elif include_dates == "month": bmi_date = "2002-06" date_query = patients.date_of("BMI", date_format="YYYY-MM") elif include_dates == "day": bmi_date = "2002-06-01" date_query = patients.date_of("BMI", date_format="YYYY-MM-DD") study = StudyDefinition( population=patients.all(), BMI=patients.most_recent_bmi( on_or_after="1995-01-01", on_or_before="2005-01-01" ), **dict(BMI_date_measured=date_query) if date_query else {} ) results = study.to_dicts() assert [x["BMI"] for x in results] == ["0.5"] assert [x.get("BMI_date_measured") for x in results] == [bmi_date]
def test_mean_recorded_value_dtype_generation(): test_codelist = codelist(["X"], system="ctv3") study = StudyDefinition( population=patients.all(), bp_sys=patients.mean_recorded_value( test_codelist, on_most_recent_day_of_measurement=True, on_or_before="2020-02-01", ), bp_sys_date_measured=patients.date_of("bp_sys", date_format="YYYY-MM"), ) result = _converters_to_names(study.pandas_csv_args) assert result == { "converters": {"bp_sys_date_measured": "add_day_to_date"}, "dtype": {"bp_sys": "float"}, "date_col_for": {"bp_sys": "bp_sys_date_measured"}, "parse_dates": ["bp_sys_date_measured"], }
def test_no_bmi_when_measurement_after_reference_date(): session = make_session() bmi_code = "22K.." patient = Patient(DateOfBirth="1900-01-01") patient.CodedEvents.append( CodedEvent(CTV3Code=bmi_code, NumericValue=99, ConsultationDate="2001-01-01") ) session.add(patient) session.commit() study = StudyDefinition( population=patients.all(), BMI=patients.most_recent_bmi( on_or_after="1990-01-01", on_or_before="2000-01-01", ), BMI_date_measured=patients.date_of("BMI", date_format="YYYY-MM-DD"), ) results = study.to_dicts() assert [x["BMI"] for x in results] == ["0.0"] assert [x["BMI_date_measured"] for x in results] == [""]
def test_mean_recorded_value(): code = "2469." session = make_session() patient = Patient() values = [ ("2020-02-10", 90), ("2020-02-10", 100), ("2020-02-10", 98), # This day is outside period and should be ignored ("2020-04-01", 110), ] for date, value in values: patient.CodedEvents.append( CodedEvent(CTV3Code=code, NumericValue=value, ConsultationDate=date) ) patient_with_old_reading = Patient() patient_with_old_reading.CodedEvents.append( CodedEvent(CTV3Code=code, NumericValue=100, ConsultationDate="2010-01-01") ) patient_with_no_reading = Patient() session.add_all([patient, patient_with_old_reading, patient_with_no_reading]) session.commit() study = StudyDefinition( population=patients.all(), bp_systolic=patients.mean_recorded_value( codelist([code], system="ctv3"), on_most_recent_day_of_measurement=True, between=["2018-01-01", "2020-03-01"], ), bp_systolic_date_measured=patients.date_of( "bp_systolic", date_format="YYYY-MM-DD" ), ) results = study.to_dicts() results = [(i["bp_systolic"], i["bp_systolic_date_measured"]) for i in results] assert results == [("96.0", "2020-02-10"), ("0.0", ""), ("0.0", "")]