def test_make_df_from_expectations_with_distribution_and_date():
    study = StudyDefinition(
        population=patients.all(),
        bmi=patients.most_recent_bmi(
            on_or_after="2010-02-01",
            minimum_age_at_measurement=16,
            return_expectations={
                "rate": "exponential_increase",
                "incidence": 0.6,
                "float": {
                    "distribution": "normal",
                    "mean": 35,
                    "stddev": 10
                },
                "date": {
                    "earliest": "1900-01-01",
                    "latest": "today"
                },
            },
        ),
        bmi_date_measured=patients.date_of(
            "bmi",
            date_format="YYYY-MM",
        ),
    )
    population_size = 10000
    result = study.make_df_from_expectations(population_size)
    assert list(sorted(result.columns)) == ["bmi", "bmi_date_measured"]

    # Check that the null-valued rows are aligned with each other
    assert ((result["bmi"] == 0.0) == pd.isnull(
        result["bmi_date_measured"])).all()
def test_clinical_events_numeric_value_dtype_generation():
    test_codelist = codelist(["X"], system="ctv3")
    study = StudyDefinition(
        population=patients.all(),
        creatinine=patients.with_these_clinical_events(
            test_codelist,
            find_last_match_in_period=True,
            on_or_before="2020-02-01",
            returning="numeric_value",
        ),
        creatinine_date=patients.date_of("creatinine", date_format="YYYY-MM"),
    )
    result = _converters_to_names(study.pandas_csv_args)
    assert result == {
        "converters": {
            "creatinine_date": "add_day_to_date"
        },
        "dtype": {
            "creatinine": "float"
        },
        "date_col_for": {
            "creatinine": "creatinine_date"
        },
        "parse_dates": ["creatinine_date"],
    }
def test_categorical_clinical_events_with_date_dtype_generation():
    categorised_codelist = codelist([("X", "Y")], system="ctv3")
    categorised_codelist.has_categories = True
    study = StudyDefinition(
        population=patients.all(),
        ethnicity=patients.with_these_clinical_events(
            categorised_codelist,
            returning="category",
            find_last_match_in_period=True,
        ),
        ethnicity_date=patients.date_of("ethnicity"),
    )

    result = _converters_to_names(study.pandas_csv_args)
    assert result == {
        "converters": {
            "ethnicity_date": "add_month_and_day_to_date"
        },
        "date_col_for": {
            "ethnicity": "ethnicity_date"
        },
        "dtype": {
            "ethnicity": "category"
        },
        "parse_dates": ["ethnicity_date"],
    }
def test_bmi_dtype_generation():
    categorised_codelist = codelist([("X", "Y")], system="ctv3")
    categorised_codelist.has_categories = True
    study = StudyDefinition(
        population=patients.all(),
        bmi=patients.most_recent_bmi(
            on_or_after="2010-02-01",
            minimum_age_at_measurement=16,
        ),
        bmi_date_measured=patients.date_of("bmi", date_format="YYYY-MM"),
    )

    result = _converters_to_names(study.pandas_csv_args)
    assert result == {
        "converters": {
            "bmi_date_measured": "add_day_to_date"
        },
        "dtype": {
            "bmi": "float"
        },
        "date_col_for": {
            "bmi": "bmi_date_measured"
        },
        "parse_dates": ["bmi_date_measured"],
    }
Example #5
0
def test_mean_recorded_value_dtype_generation():
    test_codelist = codelist(["X"], system="ctv3")
    study = StudyDefinition(
        population=patients.all(),
        bp_sys=patients.mean_recorded_value(
            test_codelist,
            on_most_recent_day_of_measurement=True,
            on_or_before="2020-02-01",
        ),
        bp_sys_date_measured=patients.date_of("bp_sys", date_format="YYYY-MM"),
    )
    result = _converters_to_names(study.pandas_csv_args)
    assert result == {
        "converters": {"bp_sys_date_measured": "add_day_to_date"},
        "dtype": {"bp_sys": "float"},
        "date_col_for": {"bp_sys": "bp_sys_date_measured"},
        "parse_dates": ["bp_sys_date_measured"],
    }