コード例 #1
0
def test_make_df_from_expectations_doesnt_alter_date_defaults():

    study = StudyDefinition(
        default_expectations={
            "rate": "exponential_increase",
            "incidence": 1.0,
            "date": {"earliest": "1900-01-01", "latest": "today"},
            "category": {"ratios": {"M": 0.5, "F": 0.5}},
        },
        population=patients.all(),
        with_different_incidence=patients.with_these_clinical_events(
            codelist(["X"], system="ctv3"),
            returning="date",
            return_expectations={"incidence": 0.2},
            include_day=True,
        ),
        with_different_date=patients.with_these_clinical_events(
            codelist(["X"], system="ctv3"),
            returning="date",
            return_expectations={"date": {"earliest": "2015-01-01", "latest": "today"}},
            include_day=True,
        ),
        with_defaults=patients.with_these_clinical_events(
            codelist(["X"], system="ctv3"), returning="date", include_day=True
        ),
    )
    population_size = 10000
    result = study.make_df_from_expectations(population_size)

    # Regression test: make sure defaults are respected even when they've been overridden
    assert result.with_defaults.min() < "2015-01-01"
    assert len(result[pd.isnull(result.with_defaults)]) == 0
コード例 #2
0
def test_study_definition_initial_stats_logging(logger):
    StudyDefinition(
        default_expectations={
            "rate": "exponential_increase",
            "incidence": 0.2,
            "date": {
                "earliest": "1900-01-01",
                "latest": "today"
            },
        },
        population=patients.all(),
        event_date_1=patients.with_these_clinical_events(
            codelist(["A"], system="ctv3"),
            returning="date",
            date_format="YYYY-MM-DD",
        ),
        event_min_date=patients.minimum_of(
            "event_date_1",
            event_date_2=patients.with_these_clinical_events(
                codelist(["B", "C"], system="ctv3"),
                returning="date",
                date_format="YYYY-MM-DD",
            ),
        ),
    )
    assert get_stats_logs(logger.entries) == [
        # output columns include patient_id, and the 4 variables defined in the
        # study defniiton, including event_date_2, which is defined as a parameter to
        # event_min_date
        # tables - Patient, temp event table for each codelist
        {
            "output_column_count": 5,
            "table_count": 3,
            "table_joins_count": 2
        },
        # variable_count is a count of the top-level variables defined in the study def (i.e. not event_date_2)
        {
            "variable_count": 4
        },
        # 2 variables use a codelist (event_date_1, and the nested event_date_2)
        {
            "variables_using_codelist_count": 2
        },
        # for each variable using a codelist, we log the size of the codelist
        {
            "variable_using_codelist": "event_date_1",
            "codelist_size": 1
        },
        {
            "variable_using_codelist": "event_date_2",
            "codelist_size": 2
        },
    ]
def test_clinical_events_numeric_value_dtype_generation():
    test_codelist = codelist(["X"], system="ctv3")
    study = StudyDefinition(
        population=patients.all(),
        creatinine=patients.with_these_clinical_events(
            test_codelist,
            find_last_match_in_period=True,
            on_or_before="2020-02-01",
            returning="numeric_value",
        ),
        creatinine_date=patients.date_of("creatinine", date_format="YYYY-MM"),
    )
    result = _converters_to_names(study.pandas_csv_args)
    assert result == {
        "converters": {
            "creatinine_date": "add_day_to_date"
        },
        "dtype": {
            "creatinine": "float"
        },
        "date_col_for": {
            "creatinine": "creatinine_date"
        },
        "parse_dates": ["creatinine_date"],
    }
def test_categorical_clinical_events_with_date_dtype_generation():
    categorised_codelist = codelist([("X", "Y")], system="ctv3")
    categorised_codelist.has_categories = True
    study = StudyDefinition(
        population=patients.all(),
        ethnicity=patients.with_these_clinical_events(
            categorised_codelist,
            returning="category",
            find_last_match_in_period=True,
        ),
        ethnicity_date=patients.date_of("ethnicity"),
    )

    result = _converters_to_names(study.pandas_csv_args)
    assert result == {
        "converters": {
            "ethnicity_date": "add_month_and_day_to_date"
        },
        "date_col_for": {
            "ethnicity": "ethnicity_date"
        },
        "dtype": {
            "ethnicity": "category"
        },
        "parse_dates": ["ethnicity_date"],
    }
def test_make_df_from_expectations_with_categories_in_codelist_validation():
    categorised_codelist = codelist([("X", "Y")], system="ctv3")
    categorised_codelist.has_categories = True
    study = StudyDefinition(
        population=patients.all(),
        ethnicity=patients.with_these_clinical_events(
            categorised_codelist,
            returning="category",
            return_expectations={
                "rate": "exponential_increase",
                "incidence": 0.2,
                "category": {
                    "ratios": {
                        "A": 0.3,
                        "B": 0.7
                    }
                },
                "date": {
                    "earliest": "1900-01-01",
                    "latest": "today"
                },
            },
            find_last_match_in_period=True,
        ),
    )
    population_size = 10000
    with pytest.raises(ValueError):
        study.make_df_from_expectations(population_size)
def test_make_df_from_expectations_with_number_of_episodes():
    study = StudyDefinition(
        population=patients.all(),
        episode_count=patients.with_these_clinical_events(
            codelist(["A", "B", "C"], system="ctv3"),
            ignore_days_where_these_codes_occur=codelist(["D", "E"],
                                                         system="ctv3"),
            returning="number_of_episodes",
            episode_defined_as="series of events each <= 14 days apart",
            return_expectations={
                "int": {
                    "distribution": "normal",
                    "mean": 4,
                    "stddev": 2
                },
                "date": {
                    "earliest": "1900-01-01",
                    "latest": "today"
                },
                "incidence": 0.2,
            },
        ),
    )
    population_size = 10000
    result = study.make_df_from_expectations(population_size)
    assert result.columns == ["episode_count"]
コード例 #7
0
def test_stats_logging_with_error(logger):
    study = StudyDefinition(
        population=patients.all(),
        event=patients.with_these_clinical_events(codelist(["A"], "snomed")),
    )

    # insert a deliberate error in the queries
    study.backend.queries[-1] = "SELECT Foo FROM Bar"
    with pytest.raises(Exception) as excinfo:
        study.to_dicts()

    # The error is raised as expected
    assert "Invalid object name 'Bar'" in str(excinfo.value)

    # Timing is logged, with the error state in the end log
    (sql_log, ) = [
        log for log in logger.entries
        if log.get("sql") == "SELECT Foo FROM Bar"
    ]
    (end_log, ) = [
        log for log in logger.entries
        if log.get("timing_id") == sql_log["timing_id"]
        and log.get("timing") == "stop"
    ]
    assert end_log["state"] == "error"
def test_make_df_from_expectations_with_categories():
    categorised_codelist = codelist([("1", "A"), ("2", "B")], system="ctv3")
    categorised_codelist.has_categories = True
    study = StudyDefinition(
        population=patients.all(),
        ethnicity=patients.with_these_clinical_events(
            categorised_codelist,
            returning="category",
            return_expectations={
                "rate": "exponential_increase",
                "incidence": 0.2,
                "category": {
                    "ratios": {
                        "A": 0.3,
                        "B": 0.7
                    }
                },
                "date": {
                    "earliest": "1900-01-01",
                    "latest": "today"
                },
            },
            find_last_match_in_period=True,
        ),
    )
    population_size = 10000
    result = study.make_df_from_expectations(population_size)
    assert result.columns == ["ethnicity"]

    category_counts = result.reset_index().groupby("ethnicity").count()
    assert category_counts.loc["A", :][0] < category_counts.loc["B", :][0]
def test_make_df_from_expectations_partial_default_overrides():
    study = StudyDefinition(
        default_expectations={
            "date": {
                "earliest": "1900-01-01",
                "latest": "today"
            },
            "rate": "exponential_increase",
            "incidence": 0.2,
        },
        population=patients.all(),
        asthma_condition=patients.with_these_clinical_events(
            codelist(["X"], system="ctv3"),
            returning="date",
            find_first_match_in_period=True,
            date_format="YYYY",
            return_expectations={"date": {
                "latest": "2000-01-01"
            }},
        ),
    )

    population_size = 10000
    result = study.make_df_from_expectations(population_size)
    assert result.asthma_condition.astype("float").max() == 2000
コード例 #10
0
def test_make_df_from_expectations_with_satisfying():
    study = StudyDefinition(
        population=patients.all(),
        has_condition=patients.satisfying(
            "condition_a OR condition_b",
            condition_a=patients.with_these_clinical_events(
                codelist(["A", "B", "C"], system="ctv3")
            ),
            condition_b=patients.with_these_clinical_events(
                codelist(["X", "Y", "Z"], system="ctv3")
            ),
            return_expectations={
                "date": {"earliest": "2001-01-01", "latest": "2020-03-01"},
                "incidence": 0.95,
            },
        ),
    )
    population_size = 10000
    result = study.make_df_from_expectations(population_size)
    assert result.columns == ["has_condition"]
コード例 #11
0
def first_diagnosis_in_period(dx_codelist):
    return patients.with_these_clinical_events(
        dx_codelist,
        returning="date",
        find_first_match_in_period=True,
        include_month=True,
        return_expectations={
            "incidence": 0.2,
            "date": {"earliest": "1950-01-01", "latest": "today"},
        },
    )
コード例 #12
0
def test_clinical_events_with_year_date_dtype_generation():
    test_codelist = codelist(["X"], system="ctv3")
    study = StudyDefinition(
        population=patients.all(),
        diabetes=patients.with_these_clinical_events(test_codelist, returning="date"),
    )
    result = _converters_to_names(study.pandas_csv_args)
    assert result == {
        "converters": {"diabetes": "add_month_and_day_to_date"},
        "date_col_for": {},
        "dtype": {},
        "parse_dates": ["diabetes"],
    }
コード例 #13
0
def test_clinical_events_with_date_dtype_generation():
    test_codelist = codelist(["X"], system="ctv3")
    study = StudyDefinition(
        population=patients.all(),
        diabetes=patients.with_these_clinical_events(
            test_codelist, return_first_date_in_period=True, date_format="YYYY-MM",
        ),
    )

    result = _converters_to_names(study.pandas_csv_args)
    assert result == {
        "converters": {"diabetes": "add_day_to_date"},
        "date_col_for": {},
        "dtype": {},
        "parse_dates": ["diabetes"],
    }
コード例 #14
0
def test_make_df_from_expectations_returning_date_using_defaults():
    study = StudyDefinition(
        default_expectations={
            "date": {"earliest": "1900-01-01", "latest": "today"},
            "rate": "exponential_increase",
            "incidence": 0.2,
        },
        population=patients.all(),
        asthma_condition=patients.with_these_clinical_events(
            codelist(["X"], system="ctv3"),
            returning="date",
            find_first_match_in_period=True,
            date_format="YYYY-MM-DD",
        ),
    )
    population_size = 10000
    result = study.make_df_from_expectations(population_size)
    assert result[~pd.isnull(result["asthma_condition"])].min()[0] < "1960-01-01"
コード例 #15
0
def test_stats_logging_with_message_handle_exception(mock_regex, logger):
    mock_regex.match.side_effect = Exception("message error")
    study = StudyDefinition(
        population=patients.all(),
        event=patients.with_these_clinical_events(codelist(["A"], "snomed")),
    )
    study.to_dicts()

    cohortextractor_stats_logs = get_stats_logs(logger.entries)
    timing_logs = get_logs_by_key(cohortextractor_stats_logs, "timing_id")
    sqlserver_stats_logs = get_stats_logs(logger.entries,
                                          event="sqlserver-stats")
    # Study runs OK and we still get the normal cohortextractor-stats timing logs
    assert len(timing_logs) > 0
    # sqlserver-stats logs just consist of the error logs
    for log in sqlserver_stats_logs:
        assert log["description"] == "Exception in SQL server message handling"
        assert str(log["exc_info"]) == "message error"
コード例 #16
0
 def var_signature(name, on_or_after):
     return {
         name:
         patients.with_these_clinical_events(
             globals()[codes],
             returning="date",
             on_or_after=on_or_after,
             date_format="YYYY-MM-DD",
             find_first_match_in_period=True,
             return_expectations={
                 "date": {
                     "earliest": from_date,
                     "latest": to_date
                 },
                 "incidence":
                 1 / i,  # to help check events_pp in counts.py works
             },
         ),
     }
コード例 #17
0
def test_make_df_from_expectations_with_date_filter():
    study = StudyDefinition(
        population=patients.all(),
        asthma_condition=patients.with_these_clinical_events(
            codelist(["X"], system="ctv3"),
            between=["2001-12-01", "2002-06-01"],
            returning="date",
            return_expectations={
                "rate": "exponential_increase",
                "incidence": 0.2,
                "date": {"earliest": "1900-01-01", "latest": "today"},
            },
            find_first_match_in_period=True,
            date_format="YYYY-MM-DD",
        ),
    )
    population_size = 10000
    result = study.make_df_from_expectations(population_size)
    assert result.columns == ["asthma_condition"]
    assert result[~pd.isnull(result["asthma_condition"])].max()[0] <= "2002-06-01"
def test_column_refs_in_date_expressions_do_not_trigger_errors():
    # Further down the road we want to actually interpret these expressions and
    # generate appopriate dates, but for now we just need to not blow up when
    # we encounter them
    study = StudyDefinition(
        population=patients.all(),
        copd_exacerbation=patients.with_these_clinical_events(
            codelist(["X"], system="ctv3"),
            between=["2001-12-01", "2002-06-01"],
            returning="date",
            return_expectations={
                "rate": "exponential_increase",
                "incidence": 0.2,
                "date": {
                    "earliest": "1990-01-01",
                    "latest": "today"
                },
            },
            find_last_match_in_period=True,
            date_format="YYYY-MM-DD",
        ),
        drug_after_exacerbation=patients.with_these_medications(
            codelist(["Y"], system="snomed"),
            between=["copd_exacerbation", "copd_exacerbation + 3 months"],
            returning="date",
            return_expectations={
                "rate": "exponential_increase",
                "incidence": 0.2,
                "date": {
                    "earliest": "1990-01-01",
                    "latest": "today"
                },
            },
            find_first_match_in_period=True,
            date_format="YYYY-MM-DD",
        ),
    )
    population_size = 10000
    # Just ensure no exception is raised
    study.make_df_from_expectations(population_size)
コード例 #19
0
def test_booleans_correctly_handled_in_dummy_data(tmp_path, file_format):
    cl = codelist(["12345"], system="snomed")
    study = StudyDefinition(
        default_expectations={
            "date": {
                "earliest": "2020-01-01",
                "latest": "today"
            }
        },
        population=patients.all(),
        has_event=patients.with_these_clinical_events(
            cl,
            returning="binary_flag",
            return_expectations={
                "rate": "uniform",
                "incidence": 0.5
            },
        ),
    )

    filename = tmp_path / f"dummy-data.{file_format}"
    study.to_file(filename, expectations_population=100)

    if file_format in ("csv", "csv.gz"):
        df = pandas.read_csv(filename, dtype=str)
        bools = ("0", "1")
    elif file_format == "feather":
        df = pandas.read_feather(filename)
        bools = (True, False)
    elif file_format in ("dta", "dta.gz"):
        df = pandas.read_stata(filename)
        bools = (0, 1)
    else:
        assert False, f"Unhandled format: {file_format}"

    # Check we've got at least some of each value
    counts = df.has_event.value_counts()
    assert counts[bools[0]] > 10
    assert counts[bools[1]] > 10
コード例 #20
0
     "rate": "universal",
     "category": {
         "ratios": {
             "M": 0.49,
             "F": 0.51
         }
     },
 }),
 ethnicity=patients.with_these_clinical_events(
     ethnicity_codes,
     returning="category",
     find_last_match_in_period=True,
     include_date_of_match=True,
     return_expectations={
         "category": {
             "ratios": {
                 "1": 0.8,
                 "5": 0.1,
                 "3": 0.1
             }
         },
         "incidence": 0.75,
     },
 ),
 # IMID disease codes
 atopic_dermatitis=first_diagnosis_in_period(atopic_dermatitis_codes),
 crohns_disease=first_diagnosis_in_period(crohns_disease_codes),
 ulcerative_colitis=first_diagnosis_in_period(ulcerative_colitis_codes),
 inflammatory_bowel_disease_unclassified=first_diagnosis_in_period(
     inflammatory_bowel_disease_unclassified_codes),
 psoriasis=first_diagnosis_in_period(psoriasis_codes),
 hidradenitis_suppurativa=first_diagnosis_in_period(
コード例 #21
0


    # https://github.com/opensafely/risk-factors-research/issues/46
    sex=patients.sex(
        return_expectations={
            "rate": "universal",
            "category": {"ratios": {"M": 0.49, "F": 0.51}},
        }
    ),

    # https://codelists.opensafely.org/codelist/opensafely/chronic-cardiac-disease/2020-04-08/
    chronic_cardiac_disease=patients.with_these_clinical_events(
        chronic_cardiac_disease_codes,
        returning="date",
        find_first_match_in_period=True,
        include_month=True,
        return_expectations={"incidence": 0.2},
    ),

    # https://codelists.opensafely.org/codelist/opensafely/chronic-liver-disease/2020-06-02/
    chronic_liver_disease=patients.with_these_clinical_events(
        chronic_liver_disease_codes,
        returning="date",
        find_first_match_in_period=True,
        include_month=True,
        return_expectations={
            "incidence": 0.2,
            "date": {"earliest": "1950-01-01", "latest": "today"},
        },
    ),
    },

    # STUDY POPULATION
    # This line defines the study population
    population=patients.registered_with_one_practice_between(
        "2018-11-01", "2019-02-01"),
    dereg_date=patients.date_deregistered_from_all_supported_practices(
        on_or_after="2020-02-01",
        date_format="YYYY-MM",
    ),

    # OUTCOMES
    worms=patients.with_these_clinical_events(
        worms_codes,
        return_first_date_in_period=True,
        include_month=True,
        return_expectations={"date": {
            "earliest": "2019-02-01"
        }},
    ),
    died_date_ons=patients.died_from_any_cause(
        on_or_before="2020-08-01",
        returning="date_of_death",
        include_month=True,
        include_day=True,
    ),

    ## DEMOGRAPHIC COVARIATES
    # AGE
    age=patients.age_as_of(
        "2019-02-01",
        return_expectations={
コード例 #23
0
def test_to_file_with_expectations_population(tmp_path, file_format):
    cl = codelist([("12345", "foo"), ("67890", "bar")], system="snomed")
    study = StudyDefinition(
        default_expectations={
            "date": {
                "earliest": "2020-01-01",
                "latest": "today"
            }
        },
        population=patients.all(),
        sex=patients.sex(return_expectations={
            "category": {
                "ratios": {
                    "F": 0.5,
                    "M": 0.5
                }
            },
            "rate": "universal",
        }, ),
        age=patients.age_as_of(
            "2020-01-01",
            return_expectations={
                "int": {
                    "distribution": "population_ages"
                },
                "rate": "universal",
            },
        ),
        has_event=patients.with_these_clinical_events(
            cl,
            returning="binary_flag",
            return_expectations={
                "rate": "uniform",
                "incidence": 0.5
            },
        ),
        event_date_day=patients.with_these_clinical_events(
            cl,
            returning="date",
            date_format="YYYY-MM-DD",
            return_expectations={
                "rate": "uniform",
                "incidence": 0.5
            },
        ),
        event_date_month=patients.with_these_clinical_events(
            cl,
            returning="date",
            date_format="YYYY-MM",
            return_expectations={
                "rate": "uniform",
                "incidence": 0.5
            },
        ),
        event_date_year=patients.with_these_clinical_events(
            cl,
            returning="date",
            date_format="YYYY",
            return_expectations={
                "rate": "uniform",
                "incidence": 0.5
            },
        ),
        incomplete_categories=patients.with_these_clinical_events(
            cl,
            returning="category",
            return_expectations={
                "category": {
                    "ratios": {
                        "foo": 0.5,
                        "bar": 0.5
                    }
                },
                # Half the values here should be null
                "incidence": 0.5,
            },
        ),
    )

    dummy_data_file = tmp_path / f"dummy-data.{file_format}"
    study.to_file(dummy_data_file, expectations_population=100)
    # We reuse validate_dummy_data to check that the data generated by the expectations
    # framework is valid.
    validate_dummy_data(study.covariate_definitions, dummy_data_file)
コード例 #24
0
 def study():
     return StudyDefinition(
         default_expectations={
             "rate": "exponential_increase",
             "incidence": 0.2,
             "date": {
                 "earliest": "1900-01-01",
                 "latest": "today"
             },
         },
         population=patients.all(),
         date_1=patients.with_these_clinical_events(
             codelist(["A"], system="ctv3"),
             returning="date",
             date_format=inconsistent_date_formats.get(
                 "date_1", "YYYY-MM-DD"),
         ),
         first_min_date=patients.minimum_of(
             "date_1",
             date_2=patients.with_these_clinical_events(
                 codelist(["B"], system="ctv3"),
                 returning="date",
                 date_format=inconsistent_date_formats.get(
                     "date_2", "YYYY-MM-DD"),
             ),
         ),
         second_min_date=patients.minimum_of(
             date_3=patients.with_these_clinical_events(
                 codelist(["Y"], system="ctv3"),
                 returning="date",
                 date_format=inconsistent_date_formats.get(
                     "date_3", "YYYY-MM-DD"),
             ),
             date_4=patients.with_these_clinical_events(
                 codelist(["Z"], system="ctv3"),
                 returning="date",
                 date_format=inconsistent_date_formats.get(
                     "date_4", "YYYY-MM-DD"),
             ),
         ),
         third_min_date=patients.minimum_of(
             date_5=patients.with_these_clinical_events(
                 codelist(["Y"], system="ctv3"),
                 returning="date",
                 date_format=inconsistent_date_formats.get(
                     "date_5", "YYYY-MM-DD"),
             ),
             date_6=patients.with_these_clinical_events(
                 codelist(["Z"], system="ctv3"),
                 returning="date",
                 date_format=inconsistent_date_formats.get(
                     "date_6", "YYYY-MM-DD"),
             ),
         ),
         min_of_second_and_third=patients.minimum_of(
             "second_min_date", "third_min_date"),
         min_overall=patients.minimum_of("min_of_second_and_third",
                                         "first_min_date"),
         min_date_1_third_min=patients.minimum_of("date_1",
                                                  "third_min_date"),
     )
コード例 #25
0
def test_to_file_with_dummy_data_file(tmp_path, file_format):
    cl = codelist(["12345"], system="snomed")
    study = StudyDefinition(
        default_expectations={
            "date": {
                "earliest": "2020-01-01",
                "latest": "today"
            }
        },
        population=patients.all(),
        sex=patients.sex(return_expectations={
            "category": {
                "ratios": {
                    "F": 0.5,
                    "M": 0.5
                }
            },
            "rate": "universal",
        }, ),
        age=patients.age_as_of(
            "2020-01-01",
            return_expectations={
                "int": {
                    "distribution": "population_ages"
                },
                "rate": "universal",
            },
        ),
        has_event=patients.with_these_clinical_events(
            cl,
            returning="binary_flag",
            return_expectations={
                "rate": "uniform",
                "incidence": 0.5
            },
        ),
        event_date_day=patients.with_these_clinical_events(
            cl,
            returning="date",
            date_format="YYYY-MM-DD",
            return_expectations={
                "rate": "uniform",
                "incidence": 0.5
            },
        ),
        event_date_month=patients.with_these_clinical_events(
            cl,
            returning="date",
            date_format="YYYY-MM",
            return_expectations={
                "rate": "uniform",
                "incidence": 0.5
            },
        ),
        event_date_year=patients.with_these_clinical_events(
            cl,
            returning="date",
            date_format="YYYY",
            return_expectations={
                "rate": "uniform",
                "incidence": 0.5
            },
        ),
    )

    # Generate dummy data using the expectations framework
    dummy_data_file = tmp_path / f"dummy-data.{file_format}"
    study.to_file(dummy_data_file, expectations_population=10)

    # Use this dummy data
    output_file = tmp_path / f"output.{file_format}"
    study.to_file(output_file, dummy_data_file=dummy_data_file)

    # Check results
    with open(dummy_data_file, "rb") as f:
        dummy_data = f.read()

    with open(output_file, "rb") as f:
        expected_output = f.read()

    assert dummy_data == expected_output
コード例 #26
0
        }),
    stp=patients.registered_practice_as_of(
        "index_date",
        returning="stp_code",
        return_expectations={
            "category": {
                "ratios": {
                    "STP1": 0.5,
                    "STP2": 0.5
                }
            },
        },
    ),
    first_dose=patients.with_these_clinical_events(
        first_dose_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
    second_dose=patients.with_these_clinical_events(
        second_dose_code,
        returning="binary_flag",
        between=["index_date", "index_date + 1 month"],
        return_expectations={"incidence": 0.4}),
)

measures = [
    Measure(id="first_dose_stp",
            numerator="first_dose",
            denominator="population",
            group_by=["stp", "age_group", "sex"]),
    Measure(id="second_dose_stp",
コード例 #27
0
def test_stats_logging_tpp_backend(logger):
    # The query counter is a global at the module level, so it isn't reset between tests
    # Find the next position (without incrementing it); this is the start of the test's timing logs
    start_counter = timing_log_counter.next

    study = StudyDefinition(
        population=patients.all(),
        event=patients.with_these_clinical_events(codelist(["A"], "snomed")),
    )
    study.to_dicts()

    # initial stats
    expected_initial_study_def_logs = [
        # output columns include patient_id, and the 2 variables defined in the
        # study defniiton
        # tables - Patient, temp event table for codelist
        {
            "output_column_count": 3,
            "table_count": 2,
            "table_joins_count": 1
        },
        {
            "variable_count": 2
        },
        {
            "variables_using_codelist_count": 1
        },
        {
            "variable_using_codelist": "event",
            "codelist_size": 1
        },
    ]

    # timing stats
    # logs in tpp_backend during query execution

    expected_timing_log_params = [
        *_sql_execute_timing_logs(
            description="Uploading codelist for event",
            sql="CREATE TABLE #tmp1_event_codelist",
            timing_id=start_counter,
        ),
        *_sql_execute_timing_logs(
            description=None,
            sql=
            "INSERT INTO #tmp1_event_codelist (code, category) VALUES\n[truncated]",
            timing_id=start_counter + 1,
            is_truncated=True,
        ),
        *_sql_execute_timing_logs(
            description="Query for event",
            sql="SELECT * INTO #event",
            timing_id=start_counter + 2,
        ),
        *_sql_execute_timing_logs(
            description="Query for population",
            sql="SELECT * INTO #population",
            timing_id=start_counter + 3,
        ),
        *_sql_execute_timing_logs(
            description="Join all columns for final output",
            sql="JOIN #event ON #event.patient_id = #population.patient_id",
            timing_id=start_counter + 4,
        ),
    ]

    assert_stats_logs(
        logger,
        expected_initial_study_def_logs,
        expected_timing_log_params,
        downloaded=False,
    )
コード例 #28
0
"""
study = StudyDefinition(
    # Configure the expectations framework (optional)
    default_expectations={
        "date": {
            "earliest": "1970-01-01",
            "latest": "today"
        },
        "rate": "uniform",
        "incidence": 0.05,
    },
    ## STUDY POPULATION (required)
    population=patients.all(),
    has_asthma=patients.with_these_clinical_events(
        asthma_codes,
        between=["2017-02-28", "2020-02-29"],
        return_expectations={"incidence": 0.5},
    ),
    asthma_ever=patients.with_these_clinical_events(
        asthma_ever_codes,
        on_or_before="2020-02-29",
        return_expectations={"incidence": 0.8},
    ),
    age_cat=patients.satisfying(
        "age >=18 AND age <= 110",
        return_expectations={"incidence": 0.9},
        age=patients.age_as_of(
            "2020-02-29",
            return_expectations={
                "rate": "universal",
                "int": {
        },
        "rate": "universal",
    },

    # define the study index date
    index_date=index_date,

    # This line defines the study population
    population=patients.satisfying(
        "(NOT died) AND (registered) AND (pregnant) AND age >= 16",
        died=patients.died_from_any_cause(on_or_before=index_date,
                                          returning="binary_flag"),
        registered=patients.registered_as_of(index_date),
        pregnant=patients.with_these_clinical_events(
            pregnant_code,
            between=["index_date", "index_date + 1 month"],
            returning="binary_flag",
            return_expectations={"incidence": 0.6},
        ),
    ),
    age=patients.age_as_of(index_date,
                           return_expectations={
                               "rate": "universal",
                               "int": {
                                   "distribution": "population_ages"
                               }
                           }),
    clinical_riskgroup=patients.with_these_clinical_events(
        clinical_riskgroup_codes,
        between=["index_date", "index_date + 1 month"],
        returning="binary_flag",
        #return_expectations= { "incidence": 0.6 },),
コード例 #30
0
         "rate": "universal",
         "category": {
             "ratios": {
                 "100": 0.2,
                 "200": 0.2,
                 "300": 0.2,
                 "400": 0.2,
                 "500": 0.2
             }
         },
     },
 ),
 learning_disability=patients.with_these_clinical_events(
     ld_codes,
     on_or_before="index_date",
     returning="binary_flag",
     return_expectations={
         "incidence": 0.01,
     },
 ),
 event=patients.with_these_clinical_events(
     codelist=codelist,
     between=["index_date", "last_day_of_month(index_date)"],
     returning="binary_flag",
     return_expectations={"incidence": 0.5}),
 event_code=patients.with_these_clinical_events(
     codelist=codelist,
     between=["index_date", "last_day_of_month(index_date)"],
     returning="code",
     return_expectations={
         "category": {
             "ratios": {