def test_tail_filter_stalled_timeseries():
    # Make a timeseries that has 24 days increasing.
    values_increasing = list(range(100_000, 124_000, 1_000))
    # Add 4 days that copy the 24th day. The filter is meant to remove these.
    values_stalled = values_increasing + [values_increasing[-1]] * 4
    assert len(values_stalled) == 28

    ds_in = test_helpers.build_default_region_dataset({CommonFields.NEW_CASES: values_stalled})
    tail_filter, ds_out = TailFilter.run(ds_in, [CommonFields.NEW_CASES])
    _assert_tail_filter_counts(tail_filter, truncated=1)
    tag_content = (
        "Removed 4 observations that look suspicious compared to mean diff of 1000.0 a few weeks "
        "ago."
    )
    truncated_timeseries = test_helpers.TimeseriesLiteral(
        values_increasing,
        annotation=[
            test_helpers.make_tag(
                TagType.CUMULATIVE_TAIL_TRUNCATED, date="2020-04-24", original_observation=123_000.0
            )
        ],
    )
    ds_expected = test_helpers.build_default_region_dataset(
        {CommonFields.NEW_CASES: truncated_timeseries}
    )
    test_helpers.assert_dataset_like(ds_out, ds_expected)

    # Try again with one day less, not enough for the filter so it returns the data unmodified.
    ds_in = test_helpers.build_default_region_dataset({CommonFields.NEW_CASES: values_stalled[:-1]})
    tail_filter, ds_out = TailFilter.run(ds_in, [CommonFields.NEW_CASES])
    _assert_tail_filter_counts(tail_filter, skipped_too_short=1)
    test_helpers.assert_dataset_like(ds_out, ds_in)
def test_tail_filter_diff_goes_negative():
    # The end of this timeseries is (in 1000s) ... 127, 126, 127, 127. Ony the last 127 is
    # expected to be truncated.
    values = list(range(100_000, 128_000, 1_000)) + [126_000, 127_000, 127_000]
    assert len(values) == 31

    ds_in = test_helpers.build_default_region_dataset({CommonFields.CASES: values})
    tail_filter, ds_out = TailFilter.run(ds_in, [CommonFields.CASES])
    ds_expected = test_helpers.build_default_region_dataset({CommonFields.CASES: values[:-1]})
    _assert_tail_filter_counts(tail_filter, truncated=1)
    test_helpers.assert_dataset_like(ds_out, ds_expected, drop_na_dates=True, compare_tags=False)
Ejemplo n.º 3
0
def test_annotation(rt_dataset, icu_dataset):
    region = Region.from_state("IL")
    tag = test_helpers.make_tag(date="2020-04-01", original_observation=10.0)
    death_url = UrlStr("http://can.com/death_source")
    cases_urls = [UrlStr("http://can.com/one"), UrlStr("http://can.com/two")]
    new_cases_url = UrlStr("http://can.com/new_cases")

    ds = test_helpers.build_default_region_dataset(
        {
            CommonFields.CASES:
            TimeseriesLiteral(
                [100, 200, 300], provenance="NYTimes", source_url=cases_urls),
            # NEW_CASES has only source_url set, to make sure that an annotation is still output.
            CommonFields.NEW_CASES:
            TimeseriesLiteral([100, 100, 100], source_url=new_cases_url),
            CommonFields.CONTACT_TRACERS_COUNT: [10] * 3,
            CommonFields.ICU_BEDS:
            TimeseriesLiteral([20, 20, 20], provenance="NotFound"),
            CommonFields.CURRENT_ICU: [5, 5, 5],
            CommonFields.DEATHS:
            TimeseriesLiteral(
                [2, 3, 2], annotation=[tag], source_url=death_url),
        },
        region=region,
        static={
            CommonFields.POPULATION: 100_000,
            CommonFields.STATE: "IL",
            CommonFields.CAN_LOCATION_PAGE_URL:
            "http://covidactnow.org/foo/bar",
        },
    )
Ejemplo n.º 4
0
def test_missing_column_for_one_method():
    ds = test_helpers.build_default_region_dataset(
        {
            CommonFields.POSITIVE_TESTS: [1, 2, 3, 4],
            CommonFields.POSITIVE_TESTS_VIRAL: TimeseriesLiteral(
                [10, 20, 30, 40], provenance="pos_viral"
            ),
            CommonFields.TOTAL_TESTS: [100, 200, 300, 400],
        }
    )
    methods = [
        DivisionMethod(
            DatasetName("method1"), CommonFields.POSITIVE_TESTS_VIRAL, CommonFields.TOTAL_TESTS
        ),
        DivisionMethod(
            DatasetName("method2"), CommonFields.POSITIVE_TESTS, CommonFields.TOTAL_TESTS
        ),
        DivisionMethod(
            DatasetName("method3"),
            CommonFields.POSITIVE_TESTS,
            CommonFields.TOTAL_TESTS_PEOPLE_VIRAL,
        ),
    ]
    methods = _replace_methods_attribute(methods, recent_days=4)
    assert (
        AllMethods.run(ds, methods, diff_days=1)
        .test_positivity.provenance.loc[test_helpers.DEFAULT_REGION.location_id]
        .at[CommonFields.TEST_POSITIVITY]
        == "pos_viral"
    )
def test_tail_filter_long_stall(stall_count: int, annotation_type: TagType):
    # This timeseries has stalled for a long time.
    values = list(range(100_000, 128_000, 1_000)) + [127_000] * stall_count
    assert len(values) == 28 + stall_count

    ds_in = test_helpers.build_default_region_dataset({CommonFields.CASES: values})
    tail_filter, ds_out = TailFilter.run(ds_in, [CommonFields.CASES])
    # There are never more than 13 stalled observations removed.
    ds_expected = test_helpers.build_default_region_dataset(
        {CommonFields.CASES: values[: -min(stall_count, 14)]}
    )
    if annotation_type is TagType.CUMULATIVE_TAIL_TRUNCATED:
        _assert_tail_filter_counts(tail_filter, truncated=1)
    elif annotation_type is TagType.CUMULATIVE_LONG_TAIL_TRUNCATED:
        _assert_tail_filter_counts(tail_filter, long_truncated=1)

    test_helpers.assert_dataset_like(ds_out, ds_expected, drop_na_dates=True, compare_tags=False)
def test_tail_filter_zero_diff():
    # Make sure constant value timeseries is not truncated.
    values = [100_000] * 28

    ds_in = test_helpers.build_default_region_dataset({CommonFields.CASES: values})
    tail_filter, ds_out = TailFilter.run(ds_in, [CommonFields.CASES])
    _assert_tail_filter_counts(tail_filter, all_good=1)
    test_helpers.assert_dataset_like(ds_out, ds_in, drop_na_dates=True)
Ejemplo n.º 7
0
def test_recent_pos_neg_tests_has_positivity_ratio(pos_neg_tests_recent):
    # positive_tests and negative_tests appear on 8/10 and 8/11. They will be used when
    # that is within 10 days of 'today'.
    dataset_in = test_helpers.build_default_region_dataset(
        {
            CommonFields.TEST_POSITIVITY_7D: TimeseriesLiteral(
                [0.02, 0.03, 0.04, 0.05, 0.06, 0.07], provenance="CDCTesting"
            ),
            CommonFields.POSITIVE_TESTS: TimeseriesLiteral(
                [1, 2, None, None, None, None], provenance="pos"
            ),
            CommonFields.NEGATIVE_TESTS: [10, 20, None, None, None, None],
        },
        start_date="2020-08-10",
    )

    if pos_neg_tests_recent:
        freeze_date = "2020-08-21"
        # positive_tests and negative_tests are used
        expected_metrics = {
            CommonFields.TEST_POSITIVITY: TimeseriesLiteral(
                [pd.NA, 0.0909, pd.NA, pd.NA, pd.NA, pd.NA], provenance="pos"
            )
        }
        expected = test_helpers.build_default_region_dataset(
            expected_metrics, start_date="2020-08-10"
        )

    else:
        freeze_date = "2020-08-22"
        # positive_tests and negative_tests no longer recent so test_positivity_7d is copied to
        # output.
        expected_metrics = {
            CommonFields.TEST_POSITIVITY: TimeseriesLiteral(
                [0.02, 0.03, 0.04, 0.05, 0.06, 0.07], provenance="CDCTesting"
            )
        }
        expected = test_helpers.build_default_region_dataset(
            expected_metrics, start_date="2020-08-10"
        )

    with freeze_time(freeze_date):
        all_methods = AllMethods.run(dataset_in)

    # check_less_precise so only 3 digits need match for testPositivityRatio
    test_helpers.assert_dataset_like(all_methods.test_positivity, expected, check_less_precise=True)
def test_tail_filter_small_diff(stall_count: int):
    # Make sure a zero increase in the most recent value(s) of a series that was increasing
    # slowly is not dropped.
    values = list(range(1_000, 1_030)) + [1_029] * stall_count

    ds_in = test_helpers.build_default_region_dataset({CommonFields.CASES: values})
    tail_filter, ds_out = TailFilter.run(ds_in, [CommonFields.CASES])
    _assert_tail_filter_counts(tail_filter, all_good=1)
    test_helpers.assert_dataset_like(ds_out, ds_in, drop_na_dates=True)
def test_tail_filter_mean_nan():
    # Make a timeseries that has 14 days of NaN, than 14 days of increasing values. The first
    # 100_000 is there so the NaN form a gap that isn't dropped by unrelated code.
    values = [100_000] + [float("NaN")] * 14 + list(range(100_000, 114_000, 1_000))
    assert len(values) == 29

    ds_in = test_helpers.build_default_region_dataset({CommonFields.NEW_CASES: values})
    tail_filter, ds_out = TailFilter.run(ds_in, [CommonFields.NEW_CASES])
    _assert_tail_filter_counts(tail_filter, skipped_na_mean=1)
    test_helpers.assert_dataset_like(ds_out, ds_in, drop_na_dates=True)
def test_tail_filter_two_series():
    # Check that two series are both filtered. Currently the 'good' dates of 14-28 days ago are
    # relative to the most recent date of any timeseries but maybe it should be per-timeseries.
    pos_tests = list(range(100_000, 128_000, 1_000))
    tot_tests = list(range(1_000_000, 1_280_000, 10_000))
    # Pad positive tests with two 'None's so the timeseries are the same length.
    pos_tests_stalled = pos_tests + [pos_tests[-1]] * 3 + [None] * 2
    tot_tests_stalled = tot_tests + [tot_tests[-1]] * 5

    ds_in = test_helpers.build_default_region_dataset(
        {
            CommonFields.POSITIVE_TESTS: pos_tests_stalled,
            CommonFields.TOTAL_TESTS: tot_tests_stalled,
        }
    )
    tail_filter, ds_out = TailFilter.run(
        ds_in, [CommonFields.POSITIVE_TESTS, CommonFields.TOTAL_TESTS]
    )
    ds_expected = test_helpers.build_default_region_dataset(
        {CommonFields.POSITIVE_TESTS: pos_tests, CommonFields.TOTAL_TESTS: tot_tests}
    )
    _assert_tail_filter_counts(tail_filter, truncated=2)
    test_helpers.assert_dataset_like(ds_out, ds_expected, drop_na_dates=True, compare_tags=False)
Ejemplo n.º 11
0
def test_annotation_all_fields_copied(rt_dataset, icu_dataset):
    region = Region.from_state("IL")
    # Create a dataset with bogus data for every CommonFields, excluding a few that are not
    # expected to have timeseries values.
    fields_excluded = {
        *TIMESERIES_INDEX_FIELDS, *GEO_DATA_COLUMNS, CommonFields.LOCATION_ID
    }
    ds = test_helpers.build_default_region_dataset(
        {
            field: TimeseriesLiteral([100, 200, 300], provenance="NYTimes")
            for field in CommonFields if field not in fields_excluded
        },
        region=region,
        static={
            CommonFields.POPULATION: 100_000,
            CommonFields.STATE: "IL",
            CommonFields.CAN_LOCATION_PAGE_URL:
            "http://covidactnow.org/foo/bar",
        },
    )
Ejemplo n.º 12
0
def test_missing_columns_for_all_tests():
    ds = test_helpers.build_default_region_dataset(
        {FieldName("m1"): [1, 2, 3, 4], FieldName("m2"): [10, 20, 30, 40]}
    )
    methods = [
        DivisionMethod(
            DatasetName("method1"), CommonFields.POSITIVE_TESTS_VIRAL, CommonFields.TOTAL_TESTS
        ),
        DivisionMethod(
            DatasetName("method2"), CommonFields.POSITIVE_TESTS, CommonFields.TOTAL_TESTS
        ),
        DivisionMethod(
            DatasetName("method3"),
            CommonFields.POSITIVE_TESTS,
            CommonFields.TOTAL_TESTS_PEOPLE_VIRAL,
        ),
    ]
    methods = _replace_methods_attribute(methods, recent_days=4)
    with pytest.raises(test_positivity.NoMethodsWithRelevantColumns):
        AllMethods.run(ds, methods, diff_days=1)