def test_make_df_from_expectations_with_distribution_and_date(): study = StudyDefinition( population=patients.all(), bmi=patients.most_recent_bmi( on_or_after="2010-02-01", minimum_age_at_measurement=16, include_measurement_date=True, include_month=True, return_expectations={ "rate": "exponential_increase", "incidence": 0.6, "float": { "distribution": "normal", "mean": 35, "stddev": 10 }, "date": { "earliest": "1900-01-01", "latest": "today" }, }, ), ) population_size = 10000 result = study.make_df_from_expectations(population_size) assert list(sorted(result.columns)) == ["bmi", "bmi_date_measured"] # Check that the null-valued rows are aligned with each other assert (result["bmi"][pd.isnull( result["bmi"])].fillna(0) == result["bmi_date_measured"][pd.isnull( result["bmi_date_measured"])].fillna(0)).all()
def test_bmi_when_only_some_measurements_of_child(): session = make_session() bmi_code = "22K.." weight_code = "X76C7" height_code = "XM01E" patient = Patient(DateOfBirth="1990-01-01") patient.CodedEvents.append( CodedEvent(CTV3Code=bmi_code, NumericValue=99, ConsultationDate="1995-01-01") ) patient.CodedEvents.append( CodedEvent(CTV3Code=weight_code, NumericValue=50, ConsultationDate="2010-01-01") ) patient.CodedEvents.append( CodedEvent(CTV3Code=height_code, NumericValue=10, ConsultationDate="2010-01-01") ) session.add(patient) session.commit() study = StudyDefinition( population=patients.all(), BMI=patients.most_recent_bmi( on_or_after="2005-01-01", on_or_before="2015-01-01", include_measurement_date=True, include_month=True, include_day=True, ), ) results = study.to_dicts() assert [x["BMI"] for x in results] == ["0.5"] assert [x["BMI_date_measured"] for x in results] == ["2010-01-01"]
def test_bmi_dtype_generation(): categorised_codelist = codelist([("X", "Y")], system="ctv3") categorised_codelist.has_categories = True study = StudyDefinition( population=patients.all(), bmi=patients.most_recent_bmi( on_or_after="2010-02-01", minimum_age_at_measurement=16, include_measurement_date=True, include_month=True, ), ) result = _converters_to_names(study.pandas_csv_args) assert result == { "converters": { "bmi_date_measured": "add_day_to_date" }, "dtype": { "bmi": "float" }, "date_col_for": { "bmi": "bmi_date_measured" }, "parse_dates": ["bmi_date_measured"], }
def test_explicit_bmi_fallback(): session = make_session() weight_code = "X76C7" bmi_code = "22K.." patient = Patient(DateOfBirth="1950-01-01") patient.CodedEvents.append( CodedEvent(CTV3Code=weight_code, NumericValue=50, ConsultationDate="2001-06-01") ) patient.CodedEvents.append( CodedEvent(CTV3Code=bmi_code, NumericValue=99, ConsultationDate="2001-10-01") ) session.add(patient) session.commit() study = StudyDefinition( population=patients.all(), BMI=patients.most_recent_bmi( on_or_after="1995-01-01", on_or_before="2005-01-01", include_measurement_date=True, include_month=True, include_day=True, ), ) results = study.to_dicts() assert [x["BMI"] for x in results] == ["99.0"] assert [x["BMI_date_measured"] for x in results] == ["2001-10-01"]
def test_no_bmi_when_measurement_after_reference_date(): session = make_session() bmi_code = "22K.." patient = Patient(DateOfBirth="1900-01-01") patient.CodedEvents.append( CodedEvent(CTV3Code=bmi_code, NumericValue=99, ConsultationDate="2001-01-01") ) session.add(patient) session.commit() study = StudyDefinition( population=patients.all(), BMI=patients.most_recent_bmi( on_or_after="1990-01-01", on_or_before="2000-01-01", include_measurement_date=True, include_month=True, include_day=True, ), ) results = study.to_dicts() assert [x["BMI"] for x in results] == ["0.0"] assert [x["BMI_date_measured"] for x in results] == [""]
def test_bmi_rounded(): session = make_session() weight_code = "X76C7" height_code = "XM01E" patient = Patient(DateOfBirth="1950-01-01") patient.CodedEvents.append( CodedEvent( CTV3Code=weight_code, NumericValue=10.12345, ConsultationDate="2001-06-01" ) ) patient.CodedEvents.append( CodedEvent(CTV3Code=height_code, NumericValue=10, ConsultationDate="2000-02-01") ) session.add(patient) session.commit() study = StudyDefinition( population=patients.all(), BMI=patients.most_recent_bmi( "2005-01-01", include_measurement_date=True, include_month=True, include_day=True, ), ) results = study.to_dicts() assert [x["BMI"] for x in results] == ["0.1"] assert [x["BMI_date_measured"] for x in results] == ["2001-06-01"]
def test_bmi_with_zero_values(): session = make_session() weight_code = "X76C7" height_code = "XM01E" patient = Patient(DateOfBirth="1950-01-01") patient.CodedEvents.append( CodedEvent(CTV3Code=weight_code, NumericValue=0, ConsultationDate="2001-06-01") ) patient.CodedEvents.append( CodedEvent(CTV3Code=height_code, NumericValue=0, ConsultationDate="2001-06-01") ) session.add(patient) session.commit() study = StudyDefinition( population=patients.all(), BMI=patients.most_recent_bmi( on_or_after="1995-01-01", on_or_before="2005-01-01", ), BMI_date_measured=patients.date_of("BMI", date_format="YYYY-MM-DD"), ) results = study.to_dicts() assert [x["BMI"] for x in results] == ["0.0"] assert [x["BMI_date_measured"] for x in results] == ["2001-06-01"]
def test_simple_bmi(include_dates): session = make_session() weight_code = "X76C7" height_code = "XM01E" patient = Patient(DateOfBirth="1950-01-01") patient.CodedEvents.append( CodedEvent(CTV3Code=weight_code, NumericValue=50, ConsultationDate="2002-06-01") ) patient.CodedEvents.append( CodedEvent(CTV3Code=height_code, NumericValue=10, ConsultationDate="2001-06-01") ) session.add(patient) session.commit() if include_dates == "none": bmi_date = None bmi_kwargs = {} elif include_dates == "year": bmi_date = "2002" bmi_kwargs = dict(include_measurement_date=True) elif include_dates == "month": bmi_date = "2002-06" bmi_kwargs = dict(include_measurement_date=True, include_month=True) elif include_dates == "day": bmi_date = "2002-06-01" bmi_kwargs = dict( include_measurement_date=True, include_month=True, include_day=True ) study = StudyDefinition( population=patients.all(), BMI=patients.most_recent_bmi( on_or_after="1995-01-01", on_or_before="2005-01-01", **bmi_kwargs ), ) results = study.to_dicts() assert [x["BMI"] for x in results] == ["0.5"] assert [x.get("BMI_date_measured") for x in results] == [bmi_date]
def test_no_bmi_when_measurements_of_child(): session = make_session() bmi_code = "22K.." patient = Patient(DateOfBirth="2000-01-01") patient.CodedEvents.append( CodedEvent(CTV3Code=bmi_code, NumericValue=99, ConsultationDate="2001-01-01") ) session.add(patient) session.commit() study = StudyDefinition( population=patients.all(), BMI=patients.most_recent_bmi( on_or_after="1995-01-01", on_or_before="2005-01-01", ), BMI_date_measured=patients.date_of("BMI", date_format="YYYY-MM-DD"), ) results = study.to_dicts() assert [x["BMI"] for x in results] == ["0.0"] assert [x["BMI_date_measured"] for x in results] == [""]
def test_simple_bmi(include_dates): session = make_session() weight_code = "X76C7" height_code = "XM01E" patient = Patient(DateOfBirth="1950-01-01") patient.CodedEvents.append( CodedEvent(CTV3Code=weight_code, NumericValue=50, ConsultationDate="2002-06-01") ) patient.CodedEvents.append( CodedEvent(CTV3Code=height_code, NumericValue=10, ConsultationDate="2001-06-01") ) session.add(patient) session.commit() if include_dates == "none": bmi_date = None date_query = None elif include_dates == "year": bmi_date = "2002" date_query = patients.date_of("BMI") elif include_dates == "month": bmi_date = "2002-06" date_query = patients.date_of("BMI", date_format="YYYY-MM") elif include_dates == "day": bmi_date = "2002-06-01" date_query = patients.date_of("BMI", date_format="YYYY-MM-DD") study = StudyDefinition( population=patients.all(), BMI=patients.most_recent_bmi( on_or_after="1995-01-01", on_or_before="2005-01-01" ), **dict(BMI_date_measured=date_query) if date_query else {} ) results = study.to_dicts() assert [x["BMI"] for x in results] == ["0.5"] assert [x.get("BMI_date_measured") for x in results] == [bmi_date]
"5": 0.1, "3": 0.1 } }, "incidence": 0.75, }, ), ## COVARIATES bmi=patients.most_recent_bmi( between=["2010-02-28", "2020-02-29"], minimum_age_at_measurement=16, include_measurement_date=True, include_month=True, return_expectations={ "date": {}, "float": { "distribution": "normal", "mean": 35, "stddev": 10 }, "incidence": 0.95, }, ), smoking_status=patients.categorised_as( { "S": "most_recent_smoking_code = 'S'", "E": """ most_recent_smoking_code = 'E' OR ( most_recent_smoking_code = 'N' AND ever_smoked ) """,
return_expectations={ "incidence": 0.2, "date": { "earliest": "1950-01-01", "latest": "today" }, }, ), # https://github.com/ebmdatalab/tpp-sql-notebook/issues/10 bmi=patients.most_recent_bmi( on_or_after="2010-02-01", minimum_age_at_measurement=16, include_measurement_date=True, include_month=True, return_expectations={ "incidence": 0.6, "float": { "distribution": "normal", "mean": 35, "stddev": 10 }, }, ), # https://github.com/ebmdatalab/tpp-sql-notebook/issues/35 bp_sys=patients.mean_recorded_value( systolic_blood_pressure_codes, on_most_recent_day_of_measurement=True, on_or_before="2020-02-01", include_measurement_date=True, include_month=True, return_expectations={ "incidence": 0.6,
# https://github.com/ebmdatalab/tpp-sql-notebook/issues/37 rural_urban=patients.address_as_of( "2020-02-01", returning="rural_urban_classification" ), # https://github.com/ebmdatalab/tpp-sql-notebook/issues/54 stp=patients.registered_practice_as_of("2020-02-01", returning="stp_code"), # region - one of NHS England 9 regions region=patients.registered_practice_as_of("2020-02-01", returning="nhse_region_name"), # https://github.com/ebmdatalab/tpp-sql-notebook/issues/10 bmi=patients.most_recent_bmi( on_or_after="2010-02-01", minimum_age_at_measurement=16, include_measurement_date=True, include_month=True, ), # https://github.com/ebmdatalab/tpp-sql-notebook/issues/6 smoking_status=patients.categorised_as( { "S": "most_recent_smoking_code = 'S'", "E": """ most_recent_smoking_code = 'E' OR ( most_recent_smoking_code = 'N' AND ever_smoked ) """, "N": "most_recent_smoking_code = 'N' AND NOT ever_smoked", "M": "DEFAULT" },