def test_make_df_from_expectations_doesnt_alter_date_defaults(): study = StudyDefinition( default_expectations={ "rate": "exponential_increase", "incidence": 1.0, "date": {"earliest": "1900-01-01", "latest": "today"}, "category": {"ratios": {"M": 0.5, "F": 0.5}}, }, population=patients.all(), with_different_incidence=patients.with_these_clinical_events( codelist(["X"], system="ctv3"), returning="date", return_expectations={"incidence": 0.2}, include_day=True, ), with_different_date=patients.with_these_clinical_events( codelist(["X"], system="ctv3"), returning="date", return_expectations={"date": {"earliest": "2015-01-01", "latest": "today"}}, include_day=True, ), with_defaults=patients.with_these_clinical_events( codelist(["X"], system="ctv3"), returning="date", include_day=True ), ) population_size = 10000 result = study.make_df_from_expectations(population_size) # Regression test: make sure defaults are respected even when they've been overridden assert result.with_defaults.min() < "2015-01-01" assert len(result[pd.isnull(result.with_defaults)]) == 0
def test_study_definition_initial_stats_logging(logger): StudyDefinition( default_expectations={ "rate": "exponential_increase", "incidence": 0.2, "date": { "earliest": "1900-01-01", "latest": "today" }, }, population=patients.all(), event_date_1=patients.with_these_clinical_events( codelist(["A"], system="ctv3"), returning="date", date_format="YYYY-MM-DD", ), event_min_date=patients.minimum_of( "event_date_1", event_date_2=patients.with_these_clinical_events( codelist(["B", "C"], system="ctv3"), returning="date", date_format="YYYY-MM-DD", ), ), ) assert get_stats_logs(logger.entries) == [ # output columns include patient_id, and the 4 variables defined in the # study defniiton, including event_date_2, which is defined as a parameter to # event_min_date # tables - Patient, temp event table for each codelist { "output_column_count": 5, "table_count": 3, "table_joins_count": 2 }, # variable_count is a count of the top-level variables defined in the study def (i.e. not event_date_2) { "variable_count": 4 }, # 2 variables use a codelist (event_date_1, and the nested event_date_2) { "variables_using_codelist_count": 2 }, # for each variable using a codelist, we log the size of the codelist { "variable_using_codelist": "event_date_1", "codelist_size": 1 }, { "variable_using_codelist": "event_date_2", "codelist_size": 2 }, ]
def test_clinical_events_numeric_value_dtype_generation(): test_codelist = codelist(["X"], system="ctv3") study = StudyDefinition( population=patients.all(), creatinine=patients.with_these_clinical_events( test_codelist, find_last_match_in_period=True, on_or_before="2020-02-01", returning="numeric_value", ), creatinine_date=patients.date_of("creatinine", date_format="YYYY-MM"), ) result = _converters_to_names(study.pandas_csv_args) assert result == { "converters": { "creatinine_date": "add_day_to_date" }, "dtype": { "creatinine": "float" }, "date_col_for": { "creatinine": "creatinine_date" }, "parse_dates": ["creatinine_date"], }
def test_categorical_clinical_events_with_date_dtype_generation(): categorised_codelist = codelist([("X", "Y")], system="ctv3") categorised_codelist.has_categories = True study = StudyDefinition( population=patients.all(), ethnicity=patients.with_these_clinical_events( categorised_codelist, returning="category", find_last_match_in_period=True, ), ethnicity_date=patients.date_of("ethnicity"), ) result = _converters_to_names(study.pandas_csv_args) assert result == { "converters": { "ethnicity_date": "add_month_and_day_to_date" }, "date_col_for": { "ethnicity": "ethnicity_date" }, "dtype": { "ethnicity": "category" }, "parse_dates": ["ethnicity_date"], }
def test_make_df_from_expectations_with_categories_in_codelist_validation(): categorised_codelist = codelist([("X", "Y")], system="ctv3") categorised_codelist.has_categories = True study = StudyDefinition( population=patients.all(), ethnicity=patients.with_these_clinical_events( categorised_codelist, returning="category", return_expectations={ "rate": "exponential_increase", "incidence": 0.2, "category": { "ratios": { "A": 0.3, "B": 0.7 } }, "date": { "earliest": "1900-01-01", "latest": "today" }, }, find_last_match_in_period=True, ), ) population_size = 10000 with pytest.raises(ValueError): study.make_df_from_expectations(population_size)
def test_make_df_from_expectations_with_number_of_episodes(): study = StudyDefinition( population=patients.all(), episode_count=patients.with_these_clinical_events( codelist(["A", "B", "C"], system="ctv3"), ignore_days_where_these_codes_occur=codelist(["D", "E"], system="ctv3"), returning="number_of_episodes", episode_defined_as="series of events each <= 14 days apart", return_expectations={ "int": { "distribution": "normal", "mean": 4, "stddev": 2 }, "date": { "earliest": "1900-01-01", "latest": "today" }, "incidence": 0.2, }, ), ) population_size = 10000 result = study.make_df_from_expectations(population_size) assert result.columns == ["episode_count"]
def test_stats_logging_with_error(logger): study = StudyDefinition( population=patients.all(), event=patients.with_these_clinical_events(codelist(["A"], "snomed")), ) # insert a deliberate error in the queries study.backend.queries[-1] = "SELECT Foo FROM Bar" with pytest.raises(Exception) as excinfo: study.to_dicts() # The error is raised as expected assert "Invalid object name 'Bar'" in str(excinfo.value) # Timing is logged, with the error state in the end log (sql_log, ) = [ log for log in logger.entries if log.get("sql") == "SELECT Foo FROM Bar" ] (end_log, ) = [ log for log in logger.entries if log.get("timing_id") == sql_log["timing_id"] and log.get("timing") == "stop" ] assert end_log["state"] == "error"
def test_make_df_from_expectations_with_categories(): categorised_codelist = codelist([("1", "A"), ("2", "B")], system="ctv3") categorised_codelist.has_categories = True study = StudyDefinition( population=patients.all(), ethnicity=patients.with_these_clinical_events( categorised_codelist, returning="category", return_expectations={ "rate": "exponential_increase", "incidence": 0.2, "category": { "ratios": { "A": 0.3, "B": 0.7 } }, "date": { "earliest": "1900-01-01", "latest": "today" }, }, find_last_match_in_period=True, ), ) population_size = 10000 result = study.make_df_from_expectations(population_size) assert result.columns == ["ethnicity"] category_counts = result.reset_index().groupby("ethnicity").count() assert category_counts.loc["A", :][0] < category_counts.loc["B", :][0]
def test_make_df_from_expectations_partial_default_overrides(): study = StudyDefinition( default_expectations={ "date": { "earliest": "1900-01-01", "latest": "today" }, "rate": "exponential_increase", "incidence": 0.2, }, population=patients.all(), asthma_condition=patients.with_these_clinical_events( codelist(["X"], system="ctv3"), returning="date", find_first_match_in_period=True, date_format="YYYY", return_expectations={"date": { "latest": "2000-01-01" }}, ), ) population_size = 10000 result = study.make_df_from_expectations(population_size) assert result.asthma_condition.astype("float").max() == 2000
def test_make_df_from_expectations_with_satisfying(): study = StudyDefinition( population=patients.all(), has_condition=patients.satisfying( "condition_a OR condition_b", condition_a=patients.with_these_clinical_events( codelist(["A", "B", "C"], system="ctv3") ), condition_b=patients.with_these_clinical_events( codelist(["X", "Y", "Z"], system="ctv3") ), return_expectations={ "date": {"earliest": "2001-01-01", "latest": "2020-03-01"}, "incidence": 0.95, }, ), ) population_size = 10000 result = study.make_df_from_expectations(population_size) assert result.columns == ["has_condition"]
def first_diagnosis_in_period(dx_codelist): return patients.with_these_clinical_events( dx_codelist, returning="date", find_first_match_in_period=True, include_month=True, return_expectations={ "incidence": 0.2, "date": {"earliest": "1950-01-01", "latest": "today"}, }, )
def test_clinical_events_with_year_date_dtype_generation(): test_codelist = codelist(["X"], system="ctv3") study = StudyDefinition( population=patients.all(), diabetes=patients.with_these_clinical_events(test_codelist, returning="date"), ) result = _converters_to_names(study.pandas_csv_args) assert result == { "converters": {"diabetes": "add_month_and_day_to_date"}, "date_col_for": {}, "dtype": {}, "parse_dates": ["diabetes"], }
def test_clinical_events_with_date_dtype_generation(): test_codelist = codelist(["X"], system="ctv3") study = StudyDefinition( population=patients.all(), diabetes=patients.with_these_clinical_events( test_codelist, return_first_date_in_period=True, date_format="YYYY-MM", ), ) result = _converters_to_names(study.pandas_csv_args) assert result == { "converters": {"diabetes": "add_day_to_date"}, "date_col_for": {}, "dtype": {}, "parse_dates": ["diabetes"], }
def test_make_df_from_expectations_returning_date_using_defaults(): study = StudyDefinition( default_expectations={ "date": {"earliest": "1900-01-01", "latest": "today"}, "rate": "exponential_increase", "incidence": 0.2, }, population=patients.all(), asthma_condition=patients.with_these_clinical_events( codelist(["X"], system="ctv3"), returning="date", find_first_match_in_period=True, date_format="YYYY-MM-DD", ), ) population_size = 10000 result = study.make_df_from_expectations(population_size) assert result[~pd.isnull(result["asthma_condition"])].min()[0] < "1960-01-01"
def test_stats_logging_with_message_handle_exception(mock_regex, logger): mock_regex.match.side_effect = Exception("message error") study = StudyDefinition( population=patients.all(), event=patients.with_these_clinical_events(codelist(["A"], "snomed")), ) study.to_dicts() cohortextractor_stats_logs = get_stats_logs(logger.entries) timing_logs = get_logs_by_key(cohortextractor_stats_logs, "timing_id") sqlserver_stats_logs = get_stats_logs(logger.entries, event="sqlserver-stats") # Study runs OK and we still get the normal cohortextractor-stats timing logs assert len(timing_logs) > 0 # sqlserver-stats logs just consist of the error logs for log in sqlserver_stats_logs: assert log["description"] == "Exception in SQL server message handling" assert str(log["exc_info"]) == "message error"
def var_signature(name, on_or_after): return { name: patients.with_these_clinical_events( globals()[codes], returning="date", on_or_after=on_or_after, date_format="YYYY-MM-DD", find_first_match_in_period=True, return_expectations={ "date": { "earliest": from_date, "latest": to_date }, "incidence": 1 / i, # to help check events_pp in counts.py works }, ), }
def test_make_df_from_expectations_with_date_filter(): study = StudyDefinition( population=patients.all(), asthma_condition=patients.with_these_clinical_events( codelist(["X"], system="ctv3"), between=["2001-12-01", "2002-06-01"], returning="date", return_expectations={ "rate": "exponential_increase", "incidence": 0.2, "date": {"earliest": "1900-01-01", "latest": "today"}, }, find_first_match_in_period=True, date_format="YYYY-MM-DD", ), ) population_size = 10000 result = study.make_df_from_expectations(population_size) assert result.columns == ["asthma_condition"] assert result[~pd.isnull(result["asthma_condition"])].max()[0] <= "2002-06-01"
def test_column_refs_in_date_expressions_do_not_trigger_errors(): # Further down the road we want to actually interpret these expressions and # generate appopriate dates, but for now we just need to not blow up when # we encounter them study = StudyDefinition( population=patients.all(), copd_exacerbation=patients.with_these_clinical_events( codelist(["X"], system="ctv3"), between=["2001-12-01", "2002-06-01"], returning="date", return_expectations={ "rate": "exponential_increase", "incidence": 0.2, "date": { "earliest": "1990-01-01", "latest": "today" }, }, find_last_match_in_period=True, date_format="YYYY-MM-DD", ), drug_after_exacerbation=patients.with_these_medications( codelist(["Y"], system="snomed"), between=["copd_exacerbation", "copd_exacerbation + 3 months"], returning="date", return_expectations={ "rate": "exponential_increase", "incidence": 0.2, "date": { "earliest": "1990-01-01", "latest": "today" }, }, find_first_match_in_period=True, date_format="YYYY-MM-DD", ), ) population_size = 10000 # Just ensure no exception is raised study.make_df_from_expectations(population_size)
def test_booleans_correctly_handled_in_dummy_data(tmp_path, file_format): cl = codelist(["12345"], system="snomed") study = StudyDefinition( default_expectations={ "date": { "earliest": "2020-01-01", "latest": "today" } }, population=patients.all(), has_event=patients.with_these_clinical_events( cl, returning="binary_flag", return_expectations={ "rate": "uniform", "incidence": 0.5 }, ), ) filename = tmp_path / f"dummy-data.{file_format}" study.to_file(filename, expectations_population=100) if file_format in ("csv", "csv.gz"): df = pandas.read_csv(filename, dtype=str) bools = ("0", "1") elif file_format == "feather": df = pandas.read_feather(filename) bools = (True, False) elif file_format in ("dta", "dta.gz"): df = pandas.read_stata(filename) bools = (0, 1) else: assert False, f"Unhandled format: {file_format}" # Check we've got at least some of each value counts = df.has_event.value_counts() assert counts[bools[0]] > 10 assert counts[bools[1]] > 10
"rate": "universal", "category": { "ratios": { "M": 0.49, "F": 0.51 } }, }), ethnicity=patients.with_these_clinical_events( ethnicity_codes, returning="category", find_last_match_in_period=True, include_date_of_match=True, return_expectations={ "category": { "ratios": { "1": 0.8, "5": 0.1, "3": 0.1 } }, "incidence": 0.75, }, ), # IMID disease codes atopic_dermatitis=first_diagnosis_in_period(atopic_dermatitis_codes), crohns_disease=first_diagnosis_in_period(crohns_disease_codes), ulcerative_colitis=first_diagnosis_in_period(ulcerative_colitis_codes), inflammatory_bowel_disease_unclassified=first_diagnosis_in_period( inflammatory_bowel_disease_unclassified_codes), psoriasis=first_diagnosis_in_period(psoriasis_codes), hidradenitis_suppurativa=first_diagnosis_in_period(
# https://github.com/opensafely/risk-factors-research/issues/46 sex=patients.sex( return_expectations={ "rate": "universal", "category": {"ratios": {"M": 0.49, "F": 0.51}}, } ), # https://codelists.opensafely.org/codelist/opensafely/chronic-cardiac-disease/2020-04-08/ chronic_cardiac_disease=patients.with_these_clinical_events( chronic_cardiac_disease_codes, returning="date", find_first_match_in_period=True, include_month=True, return_expectations={"incidence": 0.2}, ), # https://codelists.opensafely.org/codelist/opensafely/chronic-liver-disease/2020-06-02/ chronic_liver_disease=patients.with_these_clinical_events( chronic_liver_disease_codes, returning="date", find_first_match_in_period=True, include_month=True, return_expectations={ "incidence": 0.2, "date": {"earliest": "1950-01-01", "latest": "today"}, }, ),
}, # STUDY POPULATION # This line defines the study population population=patients.registered_with_one_practice_between( "2018-11-01", "2019-02-01"), dereg_date=patients.date_deregistered_from_all_supported_practices( on_or_after="2020-02-01", date_format="YYYY-MM", ), # OUTCOMES worms=patients.with_these_clinical_events( worms_codes, return_first_date_in_period=True, include_month=True, return_expectations={"date": { "earliest": "2019-02-01" }}, ), died_date_ons=patients.died_from_any_cause( on_or_before="2020-08-01", returning="date_of_death", include_month=True, include_day=True, ), ## DEMOGRAPHIC COVARIATES # AGE age=patients.age_as_of( "2019-02-01", return_expectations={
def test_to_file_with_expectations_population(tmp_path, file_format): cl = codelist([("12345", "foo"), ("67890", "bar")], system="snomed") study = StudyDefinition( default_expectations={ "date": { "earliest": "2020-01-01", "latest": "today" } }, population=patients.all(), sex=patients.sex(return_expectations={ "category": { "ratios": { "F": 0.5, "M": 0.5 } }, "rate": "universal", }, ), age=patients.age_as_of( "2020-01-01", return_expectations={ "int": { "distribution": "population_ages" }, "rate": "universal", }, ), has_event=patients.with_these_clinical_events( cl, returning="binary_flag", return_expectations={ "rate": "uniform", "incidence": 0.5 }, ), event_date_day=patients.with_these_clinical_events( cl, returning="date", date_format="YYYY-MM-DD", return_expectations={ "rate": "uniform", "incidence": 0.5 }, ), event_date_month=patients.with_these_clinical_events( cl, returning="date", date_format="YYYY-MM", return_expectations={ "rate": "uniform", "incidence": 0.5 }, ), event_date_year=patients.with_these_clinical_events( cl, returning="date", date_format="YYYY", return_expectations={ "rate": "uniform", "incidence": 0.5 }, ), incomplete_categories=patients.with_these_clinical_events( cl, returning="category", return_expectations={ "category": { "ratios": { "foo": 0.5, "bar": 0.5 } }, # Half the values here should be null "incidence": 0.5, }, ), ) dummy_data_file = tmp_path / f"dummy-data.{file_format}" study.to_file(dummy_data_file, expectations_population=100) # We reuse validate_dummy_data to check that the data generated by the expectations # framework is valid. validate_dummy_data(study.covariate_definitions, dummy_data_file)
def study(): return StudyDefinition( default_expectations={ "rate": "exponential_increase", "incidence": 0.2, "date": { "earliest": "1900-01-01", "latest": "today" }, }, population=patients.all(), date_1=patients.with_these_clinical_events( codelist(["A"], system="ctv3"), returning="date", date_format=inconsistent_date_formats.get( "date_1", "YYYY-MM-DD"), ), first_min_date=patients.minimum_of( "date_1", date_2=patients.with_these_clinical_events( codelist(["B"], system="ctv3"), returning="date", date_format=inconsistent_date_formats.get( "date_2", "YYYY-MM-DD"), ), ), second_min_date=patients.minimum_of( date_3=patients.with_these_clinical_events( codelist(["Y"], system="ctv3"), returning="date", date_format=inconsistent_date_formats.get( "date_3", "YYYY-MM-DD"), ), date_4=patients.with_these_clinical_events( codelist(["Z"], system="ctv3"), returning="date", date_format=inconsistent_date_formats.get( "date_4", "YYYY-MM-DD"), ), ), third_min_date=patients.minimum_of( date_5=patients.with_these_clinical_events( codelist(["Y"], system="ctv3"), returning="date", date_format=inconsistent_date_formats.get( "date_5", "YYYY-MM-DD"), ), date_6=patients.with_these_clinical_events( codelist(["Z"], system="ctv3"), returning="date", date_format=inconsistent_date_formats.get( "date_6", "YYYY-MM-DD"), ), ), min_of_second_and_third=patients.minimum_of( "second_min_date", "third_min_date"), min_overall=patients.minimum_of("min_of_second_and_third", "first_min_date"), min_date_1_third_min=patients.minimum_of("date_1", "third_min_date"), )
def test_to_file_with_dummy_data_file(tmp_path, file_format): cl = codelist(["12345"], system="snomed") study = StudyDefinition( default_expectations={ "date": { "earliest": "2020-01-01", "latest": "today" } }, population=patients.all(), sex=patients.sex(return_expectations={ "category": { "ratios": { "F": 0.5, "M": 0.5 } }, "rate": "universal", }, ), age=patients.age_as_of( "2020-01-01", return_expectations={ "int": { "distribution": "population_ages" }, "rate": "universal", }, ), has_event=patients.with_these_clinical_events( cl, returning="binary_flag", return_expectations={ "rate": "uniform", "incidence": 0.5 }, ), event_date_day=patients.with_these_clinical_events( cl, returning="date", date_format="YYYY-MM-DD", return_expectations={ "rate": "uniform", "incidence": 0.5 }, ), event_date_month=patients.with_these_clinical_events( cl, returning="date", date_format="YYYY-MM", return_expectations={ "rate": "uniform", "incidence": 0.5 }, ), event_date_year=patients.with_these_clinical_events( cl, returning="date", date_format="YYYY", return_expectations={ "rate": "uniform", "incidence": 0.5 }, ), ) # Generate dummy data using the expectations framework dummy_data_file = tmp_path / f"dummy-data.{file_format}" study.to_file(dummy_data_file, expectations_population=10) # Use this dummy data output_file = tmp_path / f"output.{file_format}" study.to_file(output_file, dummy_data_file=dummy_data_file) # Check results with open(dummy_data_file, "rb") as f: dummy_data = f.read() with open(output_file, "rb") as f: expected_output = f.read() assert dummy_data == expected_output
}), stp=patients.registered_practice_as_of( "index_date", returning="stp_code", return_expectations={ "category": { "ratios": { "STP1": 0.5, "STP2": 0.5 } }, }, ), first_dose=patients.with_these_clinical_events( first_dose_code, returning="binary_flag", between=["index_date", "index_date + 1 month"], return_expectations={"incidence": 0.4}), second_dose=patients.with_these_clinical_events( second_dose_code, returning="binary_flag", between=["index_date", "index_date + 1 month"], return_expectations={"incidence": 0.4}), ) measures = [ Measure(id="first_dose_stp", numerator="first_dose", denominator="population", group_by=["stp", "age_group", "sex"]), Measure(id="second_dose_stp",
def test_stats_logging_tpp_backend(logger): # The query counter is a global at the module level, so it isn't reset between tests # Find the next position (without incrementing it); this is the start of the test's timing logs start_counter = timing_log_counter.next study = StudyDefinition( population=patients.all(), event=patients.with_these_clinical_events(codelist(["A"], "snomed")), ) study.to_dicts() # initial stats expected_initial_study_def_logs = [ # output columns include patient_id, and the 2 variables defined in the # study defniiton # tables - Patient, temp event table for codelist { "output_column_count": 3, "table_count": 2, "table_joins_count": 1 }, { "variable_count": 2 }, { "variables_using_codelist_count": 1 }, { "variable_using_codelist": "event", "codelist_size": 1 }, ] # timing stats # logs in tpp_backend during query execution expected_timing_log_params = [ *_sql_execute_timing_logs( description="Uploading codelist for event", sql="CREATE TABLE #tmp1_event_codelist", timing_id=start_counter, ), *_sql_execute_timing_logs( description=None, sql= "INSERT INTO #tmp1_event_codelist (code, category) VALUES\n[truncated]", timing_id=start_counter + 1, is_truncated=True, ), *_sql_execute_timing_logs( description="Query for event", sql="SELECT * INTO #event", timing_id=start_counter + 2, ), *_sql_execute_timing_logs( description="Query for population", sql="SELECT * INTO #population", timing_id=start_counter + 3, ), *_sql_execute_timing_logs( description="Join all columns for final output", sql="JOIN #event ON #event.patient_id = #population.patient_id", timing_id=start_counter + 4, ), ] assert_stats_logs( logger, expected_initial_study_def_logs, expected_timing_log_params, downloaded=False, )
""" study = StudyDefinition( # Configure the expectations framework (optional) default_expectations={ "date": { "earliest": "1970-01-01", "latest": "today" }, "rate": "uniform", "incidence": 0.05, }, ## STUDY POPULATION (required) population=patients.all(), has_asthma=patients.with_these_clinical_events( asthma_codes, between=["2017-02-28", "2020-02-29"], return_expectations={"incidence": 0.5}, ), asthma_ever=patients.with_these_clinical_events( asthma_ever_codes, on_or_before="2020-02-29", return_expectations={"incidence": 0.8}, ), age_cat=patients.satisfying( "age >=18 AND age <= 110", return_expectations={"incidence": 0.9}, age=patients.age_as_of( "2020-02-29", return_expectations={ "rate": "universal", "int": {
}, "rate": "universal", }, # define the study index date index_date=index_date, # This line defines the study population population=patients.satisfying( "(NOT died) AND (registered) AND (pregnant) AND age >= 16", died=patients.died_from_any_cause(on_or_before=index_date, returning="binary_flag"), registered=patients.registered_as_of(index_date), pregnant=patients.with_these_clinical_events( pregnant_code, between=["index_date", "index_date + 1 month"], returning="binary_flag", return_expectations={"incidence": 0.6}, ), ), age=patients.age_as_of(index_date, return_expectations={ "rate": "universal", "int": { "distribution": "population_ages" } }), clinical_riskgroup=patients.with_these_clinical_events( clinical_riskgroup_codes, between=["index_date", "index_date + 1 month"], returning="binary_flag", #return_expectations= { "incidence": 0.6 },),
"rate": "universal", "category": { "ratios": { "100": 0.2, "200": 0.2, "300": 0.2, "400": 0.2, "500": 0.2 } }, }, ), learning_disability=patients.with_these_clinical_events( ld_codes, on_or_before="index_date", returning="binary_flag", return_expectations={ "incidence": 0.01, }, ), event=patients.with_these_clinical_events( codelist=codelist, between=["index_date", "last_day_of_month(index_date)"], returning="binary_flag", return_expectations={"incidence": 0.5}), event_code=patients.with_these_clinical_events( codelist=codelist, between=["index_date", "last_day_of_month(index_date)"], returning="code", return_expectations={ "category": { "ratios": {