def test_make_df_from_expectations_with_categories_expression_validation():
    study = StudyDefinition(
        population=patients.all(),
        category=patients.categorised_as(
            {
                "A": "sex = 'F'",
                "B": "sex = 'M'"
            },
            sex=patients.sex(),
            return_expectations={
                "rate": "exponential_increase",
                "incidence": 0.2,
                "category": {
                    "ratios": {
                        "A": 0.3,
                        "B": 0.6,
                        "C": 0.1
                    }
                },
                "date": {
                    "earliest": "1900-01-01",
                    "latest": "today"
                },
            },
        ),
    )
    population_size = 10000
    with pytest.raises(ValueError):
        study.make_df_from_expectations(population_size)
def test_make_df_from_expectations_with_categories_expression():
    study = StudyDefinition(
        population=patients.all(),
        category=patients.categorised_as(
            {
                "A": "sex = 'F'",
                "B": "sex = 'M'"
            },
            sex=patients.sex(),
            return_expectations={
                "rate": "exponential_increase",
                "incidence": 0.2,
                "category": {
                    "ratios": {
                        "A": 0.3,
                        "B": 0.7
                    }
                },
                "date": {
                    "earliest": "1900-01-01",
                    "latest": "today"
                },
            },
        ),
    )
    population_size = 10000
    result = study.make_df_from_expectations(population_size)
    value_counts = result.category.value_counts()
    assert value_counts["A"] < value_counts["B"]
def test_patients_categorised_as():
    session = make_session()
    session.add_all(
        [
            Patient(
                Sex="M",
                CodedEvents=[
                    CodedEvent(CTV3Code="foo1", ConsultationDate="2000-01-01")
                ],
            ),
            Patient(
                Sex="F",
                CodedEvents=[
                    CodedEvent(CTV3Code="foo2", ConsultationDate="2000-01-01"),
                    CodedEvent(CTV3Code="bar1", ConsultationDate="2000-01-01"),
                ],
            ),
            Patient(
                Sex="M",
                CodedEvents=[
                    CodedEvent(CTV3Code="foo2", ConsultationDate="2000-01-01")
                ],
            ),
            Patient(
                Sex="F",
                CodedEvents=[
                    CodedEvent(CTV3Code="foo3", ConsultationDate="2000-01-01")
                ],
            ),
        ]
    )
    session.commit()
    foo_codes = codelist([("foo1", "A"), ("foo2", "B"), ("foo3", "C")], "ctv3")
    bar_codes = codelist(["bar1"], "ctv3")
    study = StudyDefinition(
        population=patients.all(),
        category=patients.categorised_as(
            {
                "W": "foo_category = 'B' AND female_with_bar",
                "X": "sex = 'F' AND (foo_category = 'B' OR foo_category = 'C')",
                "Y": "sex = 'M' AND foo_category = 'A'",
                "Z": "DEFAULT",
            },
            sex=patients.sex(),
            foo_category=patients.with_these_clinical_events(
                foo_codes, returning="category", find_last_match_in_period=True
            ),
            female_with_bar=patients.satisfying(
                "has_bar AND sex = 'F'",
                has_bar=patients.with_these_clinical_events(bar_codes),
            ),
        ),
    )
    results = study.to_dicts()
    assert [x["category"] for x in results] == ["Y", "W", "Z", "X"]
    # Assert that internal columns do not appear
    assert "foo_category" not in results[0].keys()
    assert "female_with_bar" not in results[0].keys()
    assert "has_bar" not in results[0].keys()
예제 #4
0
 smoking_status=patients.categorised_as(
     {
         "S": "most_recent_smoking_code = 'S'",
         "E": """
                  most_recent_smoking_code = 'E' OR (    
                    most_recent_smoking_code = 'N' AND ever_smoked   
                  )  
             """,
         "N": "most_recent_smoking_code = 'N' AND NOT ever_smoked",
         "M": "DEFAULT",
     },
     return_expectations={
         "category": {
             "ratios": {
                 "S": 0.6,
                 "E": 0.1,
                 "N": 0.2,
                 "M": 0.1
             }
         }
     },
     most_recent_smoking_code=patients.with_these_clinical_events(
         clear_smoking_codes,
         find_last_match_in_period=True,
         on_or_before="2020-02-29",
         returning="category",
     ),
     ever_smoked=patients.with_these_clinical_events(
         filter_codes_by_category(clear_smoking_codes, include=["S", "E"]),
         on_or_before="2020-02-29",
     ),
 ),
        include_measurement_date=True,
        include_month=True,
    ),

    # https://github.com/ebmdatalab/tpp-sql-notebook/issues/6
    smoking_status=patients.categorised_as(
        {
            "S": "most_recent_smoking_code = 'S'",
            "E": """
                 most_recent_smoking_code = 'E' OR (
                   most_recent_smoking_code = 'N' AND ever_smoked
                 )
            """,
            "N": "most_recent_smoking_code = 'N' AND NOT ever_smoked",
            "M": "DEFAULT"
        },
        most_recent_smoking_code=patients.with_these_clinical_events(
            clear_smoking_codes,
            find_last_match_in_period=True,
            on_or_before='2020-02-01',
            returning="category",
        ),
        ever_smoked=patients.with_these_clinical_events(
            filter_codes_by_category(clear_smoking_codes, include=['S', 'E']),
            on_or_before='2020-02-01'
        ),
    ),
    smoking_status_date=patients.with_these_clinical_events(
        clear_smoking_codes,
        on_or_before='2020-02-01',
        return_last_date_in_period=True,
        include_month=True,