def test_make_df_from_expectations_with_mean_recorded_value(): study = StudyDefinition( population=patients.all(), drug_x=patients.mean_recorded_value( codelist(["X"], system="ctv3"), on_most_recent_day_of_measurement=True, return_expectations={ "rate": "exponential_increase", "date": { "earliest": "1900-01-01", "latest": "today" }, "incidence": 0.6, "float": { "distribution": "normal", "mean": 35, "stddev": 10 }, }, ), ) population_size = 10000 result = study.make_df_from_expectations(population_size) nonzero_results = result[result["drug_x"] != 0.0] assert abs(35 - int(nonzero_results["drug_x"].mean())) < 5
def test_mean_recorded_value_dtype_generation(): test_codelist = codelist(["X"], system="ctv3") study = StudyDefinition( population=patients.all(), bp_sys=patients.mean_recorded_value( test_codelist, on_most_recent_day_of_measurement=True, on_or_before="2020-02-01", ), bp_sys_date_measured=patients.date_of("bp_sys", date_format="YYYY-MM"), ) result = _converters_to_names(study.pandas_csv_args) assert result == { "converters": {"bp_sys_date_measured": "add_day_to_date"}, "dtype": {"bp_sys": "float"}, "date_col_for": {"bp_sys": "bp_sys_date_measured"}, "parse_dates": ["bp_sys_date_measured"], }
minimum_age_at_measurement=16, include_measurement_date=True, include_month=True, return_expectations={ "incidence": 0.6, "float": {"distribution": "normal", "mean": 35, "stddev": 10}, }, ), # https://github.com/opensafely/risk-factors-research/issues/48 bp_sys=patients.mean_recorded_value( systolic_blood_pressure_codes, on_most_recent_day_of_measurement=True, on_or_before="2020-02-01", include_measurement_date=True, include_month=True, return_expectations={ "incidence": 0.6, "float": {"distribution": "normal", "mean": 80, "stddev": 10}, }, ), # https://github.com/opensafely/risk-factors-research/issues/48 bp_dias=patients.mean_recorded_value( diastolic_blood_pressure_codes, on_most_recent_day_of_measurement=True, on_or_before="2020-02-01", include_measurement_date=True, include_month=True, return_expectations={ "incidence": 0.6,
return_expectations={"rate" : "universal", "int" : {"distribution" : "population_ages"}}), ## bmi bmi=patients.most_recent_bmi(between=["2010-02-01", "2020-01-31"], minimum_age_at_measurement=18, include_measurement_date=True, date_format="YYYY-MM", return_expectations={"date": {"earliest": "2010-02-01", "latest": "2020-01-31"}, "float": {"distribution": "normal", "mean": 28, "stddev": 8}, "incidence": 0.80,}), ## systolic blood pressure bp_sys=patients.mean_recorded_value(systolic_blood_pressure_codes, on_most_recent_day_of_measurement=True, between=["2017-02-01", "2020-01-31"], include_measurement_date=True, date_format="YYYY-MM", return_expectations={"float": {"distribution": "normal", "mean": 80, "stddev": 10}, "date": {"earliest": "2019-02-01", "latest": "2020-01-31"}, "incidence": 0.95,}), ## ace inhibitor ace_inhibitor = patients.with_these_medications(ace_inhibitor_codes, between=["1900-01-01", "2020-02-01"], include_date_of_match = True, date_format="YYYY-MM-DD", returning="binary_flag", return_expectations = {"incidence": 0.05, "date": {"earliest": "1980-02-01", "latest": "2020-01-31"}}),