コード例 #1
0
def test_provenance():
    region_as = Region.from_state("AS")
    region_tx = Region.from_state("TX")
    metrics_as = {
        CommonFields.POSITIVE_TESTS: TimeseriesLiteral([0, 2, 4, 6], provenance="pt_src1"),
        CommonFields.TOTAL_TESTS: [100, 200, 300, 400],
    }
    metrics_tx = {
        CommonFields.POSITIVE_TESTS: TimeseriesLiteral([1, 2, 3, 4], provenance="pt_src2"),
        CommonFields.POSITIVE_TESTS_VIRAL: TimeseriesLiteral(
            [10, 20, 30, 40], provenance="pos_viral"
        ),
        CommonFields.TOTAL_TESTS: [100, 200, 300, 400],
    }
    dataset_in = test_helpers.build_dataset({region_as: metrics_as, region_tx: metrics_tx})

    methods = [
        DivisionMethod(
            DatasetName("method1"), CommonFields.POSITIVE_TESTS_VIRAL, CommonFields.TOTAL_TESTS
        ),
        DivisionMethod(
            DatasetName("method2"), CommonFields.POSITIVE_TESTS, CommonFields.TOTAL_TESTS
        ),
    ]
    all_methods = AllMethods.run(dataset_in, methods, diff_days=3)

    expected_as = {CommonFields.TEST_POSITIVITY: TimeseriesLiteral([0.02], provenance=["pt_src1"])}
    expected_tx = {CommonFields.TEST_POSITIVITY: TimeseriesLiteral([0.1], provenance="pos_viral")}
    expected_positivity = test_helpers.build_dataset(
        {region_as: expected_as, region_tx: expected_tx}, start_date="2020-04-04"
    )
    test_helpers.assert_dataset_like(all_methods.test_positivity, expected_positivity)
コード例 #2
0
def test_annotation(rt_dataset, icu_dataset):
    region = Region.from_state("IL")
    tag = test_helpers.make_tag(date="2020-04-01", original_observation=10.0)
    death_url = UrlStr("http://can.com/death_source")
    cases_urls = [UrlStr("http://can.com/one"), UrlStr("http://can.com/two")]
    new_cases_url = UrlStr("http://can.com/new_cases")

    ds = test_helpers.build_default_region_dataset(
        {
            CommonFields.CASES:
            TimeseriesLiteral(
                [100, 200, 300], provenance="NYTimes", source_url=cases_urls),
            # NEW_CASES has only source_url set, to make sure that an annotation is still output.
            CommonFields.NEW_CASES:
            TimeseriesLiteral([100, 100, 100], source_url=new_cases_url),
            CommonFields.CONTACT_TRACERS_COUNT: [10] * 3,
            CommonFields.ICU_BEDS:
            TimeseriesLiteral([20, 20, 20], provenance="NotFound"),
            CommonFields.CURRENT_ICU: [5, 5, 5],
            CommonFields.DEATHS:
            TimeseriesLiteral(
                [2, 3, 2], annotation=[tag], source_url=death_url),
        },
        region=region,
        static={
            CommonFields.POPULATION: 100_000,
            CommonFields.STATE: "IL",
            CommonFields.CAN_LOCATION_PAGE_URL:
            "http://covidactnow.org/foo/bar",
        },
    )
コード例 #3
0
def test_pyseir_end_to_end_dc(tmp_path):
    # Runs over a single state which tests state filtering + running over more than
    # a single fips.
    with unittest.mock.patch("pyseir.utils.OUTPUT_DIR", str(tmp_path)):
        region = Region.from_state("DC")
        pipelines = cli._build_all_for_states(states=["DC"])
        # Checking to make sure that build all for states properly filters and only
        # returns DC data
        assert len(pipelines) == 2
コード例 #4
0
def il_regional_input(rt_dataset, icu_dataset):
    region = Region.from_state("IL")
    regional_data = combined_datasets.load_us_timeseries_dataset(
    ).get_regions_subset([region])
    # TODO(tom): add test positivity back in after PR 728 is merged.
    # test_positivity_results = test_positivity.AllMethods.run(regional_data)
    # regional_data = regional_data.join_columns(test_positivity_results.test_positivity)
    return api_v2_pipeline.RegionalInput.from_region_and_model_output(
        region, regional_data, rt_dataset, icu_dataset)
コード例 #5
0
def il_regional_input(rt_dataset, icu_dataset):
    region = Region.from_state("IL")
    regional_data = combined_datasets.load_us_timeseries_dataset(
    ).get_regions_subset([region])
    regional_data = test_positivity.run_and_maybe_join_columns(
        regional_data, structlog.get_logger())

    return api_v2_pipeline.RegionalInput.from_region_and_model_output(
        region, regional_data, rt_dataset, icu_dataset)
コード例 #6
0
def test_preserve_tags():
    region_as = Region.from_state("AS")
    region_tx = Region.from_state("TX")
    tag1 = test_helpers.make_tag(type=TagType.CUMULATIVE_LONG_TAIL_TRUNCATED, date="2020-04-04")
    tag2 = test_helpers.make_tag(type=TagType.CUMULATIVE_TAIL_TRUNCATED, date="2020-04-04")
    tag_drop = test_helpers.make_tag(type=TagType.ZSCORE_OUTLIER, date="2020-04-01")
    tag3 = test_helpers.make_tag(type=TagType.ZSCORE_OUTLIER, date="2020-04-04")
    tag4 = test_helpers.make_tag(type=TagType.ZSCORE_OUTLIER, date="2020-04-03")
    metrics_as = {
        CommonFields.POSITIVE_TESTS: TimeseriesLiteral(
            [1, 2, 3, 4], annotation=[tag1], provenance="pos"
        ),
        CommonFields.TOTAL_TESTS: TimeseriesLiteral([100, 200, 300, 400], annotation=[tag2]),
    }
    metrics_tx = {
        CommonFields.POSITIVE_TESTS: TimeseriesLiteral([None, None, 3, 4], annotation=[tag_drop]),
        CommonFields.POSITIVE_TESTS_VIRAL: [10, 20, 30, 40],
        CommonFields.TOTAL_TESTS: TimeseriesLiteral([100, 200, 300, 400], annotation=[tag3, tag4]),
    }
    dataset_in = test_helpers.build_dataset({region_as: metrics_as, region_tx: metrics_tx})

    methods = [
        DivisionMethod(
            DatasetName("method1"), CommonFields.POSITIVE_TESTS, CommonFields.TOTAL_TESTS
        ),
        DivisionMethod(
            DatasetName("method2"), CommonFields.POSITIVE_TESTS_VIRAL, CommonFields.TOTAL_TESTS
        ),
    ]
    all_methods = AllMethods.run(dataset_in, methods, diff_days=3)

    expected_as = {
        CommonFields.TEST_POSITIVITY: TimeseriesLiteral(
            [0.01], provenance="pos", annotation=[tag1, tag2]
        )
    }
    expected_tx = {CommonFields.TEST_POSITIVITY: TimeseriesLiteral([0.1], annotation=[tag3, tag4])}
    expected_positivity = test_helpers.build_dataset(
        {region_as: expected_as, region_tx: expected_tx}, start_date="2020-04-04"
    )
    test_helpers.assert_dataset_like(all_methods.test_positivity, expected_positivity)
コード例 #7
0
def test_top_level_metrics_no_pos_neg_tests_has_positivity_ratio():
    ny_region = Region.from_state("NY")
    metrics = {
        CommonFields.CASES: [10, 20, 30, 40],
        CommonFields.NEW_CASES: [10, 10, 10, 10],
        CommonFields.TEST_POSITIVITY: [0.02, 0.03, 0.04, 0.05],
    }
    latest = {
        CommonFields.POPULATION: 100_000,
        CommonFields.FIPS: "36",
        CommonFields.STATE: "NY",
        CommonFields.ICU_BEDS: 10,
    }
コード例 #8
0
ファイル: data.py プロジェクト: epius/covid-data-model
def update_test_combined_data(truncate_dates: bool, state: List[str]):
    us_dataset = combined_datasets.load_us_timeseries_dataset()
    # Keep only a small subset of the regions so we have enough to exercise our code in tests.
    test_subset = us_dataset.get_regions_subset([
        RegionMask(states=[s.strip() for s in state]),
        Region.from_fips("48201"),
        Region.from_fips("48301"),
        Region.from_fips("20161"),
        Region.from_state("TX"),
        Region.from_state("KS"),
    ])
    if truncate_dates:
        dates = test_subset.timeseries_bucketed.index.get_level_values(
            CommonFields.DATE)
        date_range_mask = (dates >= "2021-01-01") & (dates < "2021-04-01")
        test_subset = dataclasses.replace(
            test_subset,
            timeseries_bucketed=test_subset.timeseries_bucketed.
            loc[date_range_mask])
    test_subset.write_to_wide_dates_csv(
        dataset_utils.TEST_COMBINED_WIDE_DATES_CSV_PATH,
        dataset_utils.TEST_COMBINED_STATIC_CSV_PATH)
コード例 #9
0
def test_basic():
    region_tx = Region.from_state("TX")
    region_sf = Region.from_fips("06075")
    region_hi = Region.from_state("HI")
    # Add a timeseries with a tag to make sure they are preserved.
    ts_with_tag = TimeseriesLiteral(
        [0, 0, 0], annotation=[test_helpers.make_tag(date="2020-04-01")])
    ds_in = test_helpers.build_dataset({
        region_tx: {
            CommonFields.VACCINES_DISTRIBUTED: [0, 0, 0]
        },
        region_sf: {
            CommonFields.VACCINES_DISTRIBUTED: [0, 0, 1]
        },
        region_hi: {
            CommonFields.VACCINES_DISTRIBUTED: [0, 0, None],
            CommonFields.CASES: ts_with_tag,
        },
    })

    with structlog.testing.capture_logs() as logs:
        ds_out = zeros_filter.drop_all_zero_timeseries(
            ds_in, [CommonFields.VACCINES_DISTRIBUTED])
    ds_expected = test_helpers.build_dataset({
        region_sf: {
            CommonFields.VACCINES_DISTRIBUTED: [0, 0, 1]
        },
        region_hi: {
            CommonFields.CASES: ts_with_tag
        },
    })
    log = more_itertools.one(logs)
    assert log["event"] == zeros_filter.DROPPING_TIMESERIES_WITH_ONLY_ZEROS
    assert pd.MultiIndex.from_tuples([
        (region_hi.location_id, CommonFields.VACCINES_DISTRIBUTED),
        (region_tx.location_id, CommonFields.VACCINES_DISTRIBUTED),
    ]).equals(log["dropped"])
    test_helpers.assert_dataset_like(ds_expected, ds_out)
コード例 #10
0
def test_default_positivity_methods():
    # This test intentionally doesn't pass any methods to AllMethods.run to run the methods used
    # in production.
    region_as = Region.from_state("AS")
    region_tx = Region.from_state("TX")
    metrics_as = {
        CommonFields.POSITIVE_TESTS: TimeseriesLiteral([0, 1, 2, 3, 4, 5, 6, 7], provenance="src1"),
        CommonFields.NEGATIVE_TESTS: TimeseriesLiteral(
            [10, 19, 28, 37, 46, 55, 64, 73], provenance="src1"
        ),
    }
    metrics_tx = {
        CommonFields.POSITIVE_TESTS_VIRAL: TimeseriesLiteral(
            [2, 4, 6, 8, 10, 12, 14, 16], provenance="pos_tests"
        ),
        CommonFields.TOTAL_TESTS_VIRAL: [10, 20, 30, 40, 50, 60, 70, 80],
    }
    dataset_in = test_helpers.build_dataset({region_as: metrics_as, region_tx: metrics_tx})

    # TODO(tom): Once test positivity code seems stable remove call to datetime.today() in
    #  has_recent_data and remove this freeze_time.
    with freeze_time("2020-04-14"):
        all_methods = AllMethods.run(dataset_in, diff_days=1)

    expected_as = {
        CommonFields.TEST_POSITIVITY: TimeseriesLiteral(
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], provenance="src1",
        )
    }
    expected_tx = {
        CommonFields.TEST_POSITIVITY: TimeseriesLiteral(
            [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2], provenance="pos_tests"
        )
    }
    expected_positivity = test_helpers.build_dataset(
        {region_as: expected_as, region_tx: expected_tx}, start_date="2020-04-02",
    )
    test_helpers.assert_dataset_like(all_methods.test_positivity, expected_positivity)
コード例 #11
0
def il_regional_input_empty_test_positivity_column(rt_dataset, icu_dataset):
    region = Region.from_state("IL")
    regional_data = combined_datasets.load_us_timeseries_dataset(
    ).get_regions_subset([region])
    empty_test_positivity = timeseries.MultiRegionTimeseriesDataset.from_timeseries_df(
        pd.DataFrame([],
                     columns=[
                         CommonFields.LOCATION_ID, CommonFields.DATE,
                         CommonFields.TEST_POSITIVITY
                     ]))

    regional_data = regional_data.join_columns(empty_test_positivity)
    return api_v2_pipeline.RegionalInput.from_region_and_model_output(
        region, regional_data, rt_dataset, icu_dataset)
コード例 #12
0
def test_column_present_with_no_data():
    region_tx = Region.from_state("TX")
    ds = test_helpers.build_dataset(
        {region_tx: {CommonFields.TOTAL_TESTS: [100, 200, 400]}},
        timeseries_columns=[CommonFields.POSITIVE_TESTS],
    )
    method = DivisionMethod(
        DatasetName("method2"),
        CommonFields.POSITIVE_TESTS,
        CommonFields.TOTAL_TESTS,
        recent_days=1,
    )
    with pytest.raises(test_positivity.NoColumnsWithDataException):
        AllMethods.run(ds, [method], diff_days=1)
コード例 #13
0
def test_aggregate_reporting_ratio(reporting_ratio, expected_na):
    ny_region = Region.from_state("NY")
    az_region = Region.from_state("AZ")
    us_region = Region.from_iso1("us")
    aggregate_map = {
        ny_region: us_region,
        az_region: us_region,
    }
    metrics = {ny_region: {CommonFields.CASES: [100]}, az_region: {CommonFields.CASES: [None]}}
    static = {ny_region: {CommonFields.POPULATION: 900}, az_region: {CommonFields.POPULATION: 100}}
    dataset = test_helpers.build_dataset(metrics, static_by_region_then_field_name=static)

    agg = statistical_areas.CountyToCBSAAggregator(
        county_map={ny_region.fips: us_region.fips, az_region.fips: us_region.fips},
        cbsa_title_map={us_region.fips: "Stat Area 1"},
        aggregations=[],
    )
    aggregation = agg.aggregate(dataset, reporting_ratio_required_to_aggregate=reporting_ratio)
    cases = aggregation.timeseries[CommonFields.CASES]
    if expected_na:
        assert not len(cases)
    else:
        assert len(cases)
コード例 #14
0
def test_top_level_metrics_no_pos_neg_tests_no_positivity_ratio():
    region_ny = Region.from_state("NY")
    # All of positive_tests, negative_tests are empty and test_positivity is absent. Make sure
    # other metrics are still produced.
    metrics = {
        CommonFields.CASES: [10, 20, 30, 40],
        CommonFields.NEW_CASES: [10, 10, 10, 10],
        CommonFields.CONTACT_TRACERS_COUNT: [1, 2, 3, 4],
    }
    latest = {
        CommonFields.POPULATION: 100_000,
        CommonFields.FIPS: "36",
        CommonFields.STATE: "NY",
        CommonFields.ICU_BEDS: 10,
    }
コード例 #15
0
def test_top_level_metrics_incomplete_latest():
    region_ny = Region.from_state("NY")
    # This test doesn't have ICU_BEDS set in `latest`. It checks that the metrics are still built.
    metrics = {
        CommonFields.CASES: [10, 20, None, 40],
        CommonFields.NEW_CASES: [10, 10, 10, 10],
        CommonFields.TEST_POSITIVITY: [None, 0.1, 0.1, 0.1],
        CommonFields.CONTACT_TRACERS_COUNT: [1, 2, 3, 4],
        CommonFields.CURRENT_ICU: [10, 10, 10, 10],
        CommonFields.CURRENT_ICU_TOTAL: [20, 20, 20, 20],
        CommonFields.ICU_BEDS: [None, None, None, None],
    }
    latest = {
        CommonFields.POPULATION: 100_000,
        CommonFields.STATE: "NY",
        # ICU_BEDS not set
    }
コード例 #16
0
def test_dataclass_include_exclude():
    """Tests datasource_regions using mock data for speed."""
    region_data = {CommonFields.CASES: [100, 200, 300], CommonFields.DEATHS: [0, 1, 2]}
    regions_orig = [Region.from_state(state) for state in "AZ CA NY IL TX".split()] + [
        Region.from_fips(fips) for fips in "06037 06045 17031 17201".split()
    ]
    dataset_orig = test_helpers.build_dataset({region: region_data for region in regions_orig})

    # Make a new subclass to keep this test separate from others in the make_dataset lru_cache.
    class DataSourceForTest(data_source.DataSource):
        EXPECTED_FIELDS = [CommonFields.CASES, CommonFields.DEATHS]
        SOURCE_TYPE = "DataSourceForTest"

        @classmethod
        def make_dataset(cls) -> timeseries.MultiRegionDataset:
            return dataset_orig

    orig_data_source_cls = DataSourceForTest
    orig_ds = orig_data_source_cls.make_dataset()
    assert "iso1:us#iso2:us-tx" in orig_ds.location_ids
    assert "iso1:us#iso2:us-ny" in orig_ds.location_ids

    ny_source = combined_datasets.datasource_regions(
        orig_data_source_cls, RegionMask(states=["NY"])
    )
    ny_ds = ny_source.make_dataset()
    assert "iso1:us#iso2:us-tx" not in ny_ds.location_ids
    assert "iso1:us#iso2:us-ny" in ny_ds.location_ids

    ca_counties_without_la_source = combined_datasets.datasource_regions(
        orig_data_source_cls,
        RegionMask(AggregationLevel.COUNTY, states=["CA"]),
        exclude=Region.from_fips("06037"),
    )
    ds = ca_counties_without_la_source.make_dataset()
    assert "iso1:us#iso2:us-tx" not in ds.location_ids
    assert "iso1:us#iso2:us-ca" not in ds.location_ids
    assert "iso1:us#iso2:us-ca#fips:06045" in ds.location_ids
    assert "iso1:us#iso2:us-ca#fips:06037" not in ds.location_ids

    # Just Cook County, IL
    ds = combined_datasets.datasource_regions(
        orig_data_source_cls, include=Region.from_fips("17031")
    ).make_dataset()
    assert ds.location_ids.to_list() == ["iso1:us#iso2:us-il#fips:17031"]
コード例 #17
0
def test_annotation_all_fields_copied(rt_dataset, icu_dataset):
    region = Region.from_state("IL")
    # Create a dataset with bogus data for every CommonFields, excluding a few that are not
    # expected to have timeseries values.
    fields_excluded = {
        *TIMESERIES_INDEX_FIELDS, *GEO_DATA_COLUMNS, CommonFields.LOCATION_ID
    }
    ds = test_helpers.build_default_region_dataset(
        {
            field: TimeseriesLiteral([100, 200, 300], provenance="NYTimes")
            for field in CommonFields if field not in fields_excluded
        },
        region=region,
        static={
            CommonFields.POPULATION: 100_000,
            CommonFields.STATE: "IL",
            CommonFields.CAN_LOCATION_PAGE_URL:
            "http://covidactnow.org/foo/bar",
        },
    )
コード例 #18
0
def _transform_one_override(
        override: Mapping,
        cbsa_to_counties_map: Mapping[Region, List[Region]]) -> Filter:
    region_str = override["region"]
    if re.fullmatch(r"[A-Z][A-Z]", region_str):
        region = Region.from_state(region_str)
    elif re.fullmatch(r"\d{5}", region_str):
        region = Region.from_fips(region_str)
    else:
        raise ValueError(f"Invalid region: {region_str}")

    include_str = override["include"]
    if include_str == "region":
        regions_included = [region]
    elif include_str == "region-and-subregions":
        if region.is_state():
            regions_included = [RegionMask(states=[region.state])]
        elif region.level == AggregationLevel.CBSA:
            regions_included = [region] + cbsa_to_counties_map[region]
        else:
            raise ValueError(
                "region-and-subregions only valid for a state and CBSA")
    elif include_str == "subregions":
        if not region.is_state():
            raise ValueError("subregions only valid for a state")
        regions_included = [
            RegionMask(AggregationLevel.COUNTY, states=[region.state])
        ]
    else:
        raise ValueError(f"Invalid include: {include_str}")

    return Filter(
        regions_included=regions_included,
        fields_included=_METRIC_TO_FIELDS[override["metric"]],
        internal_note=override["context"],
        public_note=override.get("disclaimer", ""),
        drop_observations=bool(override["blocked"]),
    )
コード例 #19
0
def test_make_latest_from_timeseries_dont_touch_county():
    data = read_csv_and_index_fips_date(
        "fips,county,state,country,date,aggregate_level,m1,m2\n"
        "95123,Smith Countyy,YY,USA,2020-04-01,county,1,\n"
        "97123,Smith Countzz,ZZ,USA,2020-04-01,county,2,\n"
        "56,,WY,USA,2020-04-01,state,3,\n"
    ).reset_index()
    ds = timeseries.MultiRegionDataset.from_fips_timeseries_df(data)

    def get_latest(region) -> dict:
        """Returns an interesting subset of latest for given region"""
        latest = ds.get_one_region(region).latest
        return {key: latest[key] for key in ["county", "m1", "m2"] if latest.get(key) is not None}

    assert get_latest(Region.from_fips("95123")) == {
        "m1": 1,
        "county": "Smith Countyy",
    }
    assert get_latest(Region.from_fips("97123")) == {
        "m1": 2,
        "county": "Smith Countzz",
    }
    assert get_latest(Region.from_state("WY")) == {"m1": 3}
コード例 #20
0
import dataclasses
from functools import lru_cache

from libs.datasets import AggregationLevel
from libs.datasets import data_source
import pandas as pd
from covidactnow.datapublic.common_fields import CommonFields
from libs.datasets.sources import can_scraper_helpers as ccd_helpers
from libs.datasets.timeseries import MultiRegionDataset
from libs.pipeline import Region

DC_COUNTY_LOCATION_ID = Region.from_fips("11001").location_id
DC_STATE_LOCATION_ID = Region.from_state("DC").location_id


def _remove_trailing_zeros(series: pd.Series) -> pd.Series:

    series = pd.Series(series.values.copy(),
                       index=series.index.get_level_values(CommonFields.DATE))

    index = series.loc[series != 0].last_valid_index()

    if index is None:
        # If test positivity is 0% the entire time, considering the data inaccurate, returning
        # none.
        series[:] = None
        return series

    series[index + pd.DateOffset(1):] = None
    return series
コード例 #21
0
def test_recent_days():
    region_as = Region.from_state("AS")
    region_tx = Region.from_state("TX")
    metrics_as = {
        CommonFields.POSITIVE_TESTS: TimeseriesLiteral([0, 2, 4, 6], provenance="pos"),
        CommonFields.POSITIVE_TESTS_VIRAL: TimeseriesLiteral(
            [0, 20, None, None], provenance="pos_viral"
        ),
        CommonFields.TOTAL_TESTS: TimeseriesLiteral([100, 200, 300, 400]),
    }
    metrics_tx = {
        CommonFields.POSITIVE_TESTS: TimeseriesLiteral([1, 2, 3, 4], provenance="pos"),
        CommonFields.POSITIVE_TESTS_VIRAL: TimeseriesLiteral(
            [10, 20, 30, 40], provenance="pos_viral"
        ),
        CommonFields.TOTAL_TESTS: TimeseriesLiteral([100, 200, 300, 400]),
    }
    ds = test_helpers.build_dataset({region_as: metrics_as, region_tx: metrics_tx})
    methods = [
        DivisionMethod(
            DatasetName("method1"), CommonFields.POSITIVE_TESTS_VIRAL, CommonFields.TOTAL_TESTS
        ),
        DivisionMethod(
            DatasetName("method2"), CommonFields.POSITIVE_TESTS, CommonFields.TOTAL_TESTS
        ),
    ]
    methods = _replace_methods_attribute(methods, recent_days=2)
    all_methods = AllMethods.run(ds, methods, diff_days=1)

    expected_positivity = test_helpers.build_dataset(
        {
            region_as: {
                CommonFields.TEST_POSITIVITY: TimeseriesLiteral(
                    [0.02, 0.02, 0.02], provenance="pos"
                )
            },
            region_tx: {
                CommonFields.TEST_POSITIVITY: TimeseriesLiteral(
                    [0.1, 0.1, 0.1], provenance="pos_viral"
                )
            },
        },
        start_date="2020-04-02",
    )
    test_helpers.assert_dataset_like(all_methods.test_positivity, expected_positivity)
    assert all_methods.test_positivity.get_one_region(region_as).provenance == {
        CommonFields.TEST_POSITIVITY: ["pos"]
    }
    assert all_methods.test_positivity.get_one_region(region_tx).provenance == {
        CommonFields.TEST_POSITIVITY: ["pos_viral"]
    }

    methods = _replace_methods_attribute(methods, recent_days=3)
    all_methods = AllMethods.run(ds, methods, diff_days=1)
    positivity_provenance = all_methods.test_positivity.provenance
    assert positivity_provenance.loc["iso1:us#iso2:us-as"].to_dict() == {
        CommonFields.TEST_POSITIVITY: "pos_viral"
    }
    assert positivity_provenance.loc["iso1:us#iso2:us-tx"].to_dict() == {
        CommonFields.TEST_POSITIVITY: "pos_viral"
    }
コード例 #22
0
def derive_ca_county_vaccine_pct(
        ds_in: MultiRegionDataset) -> MultiRegionDataset:
    """Derives vaccination metrics for CA counties based on State 1st vs 2nd dose reporting."""

    ca_county_dataset = ds_in.get_subset(
        aggregation_level=AggregationLevel.COUNTY, state="CA")
    # Get county level time-series in distribution bucket "all". Keep the bucket in the index so
    # that the concat at the bottom of this function has the correct labels for each time-series.
    ca_county_wide = ca_county_dataset.timeseries_bucketed_wide_dates.xs(
        DemographicBucket.ALL,
        level=PdFields.DEMOGRAPHIC_BUCKET,
        drop_level=False)
    fields_to_check = [
        CommonFields.VACCINATIONS_INITIATED,
        CommonFields.VACCINATIONS_COMPLETED,
        CommonFields.VACCINATIONS_INITIATED_PCT,
        CommonFields.VACCINATIONS_COMPLETED_PCT,
    ]
    # Assert that possible fields we want to estimate are all NA - if one of these is
    # not NA, likely do not need to estimate anymore and this methodology can be removed.
    assert ca_county_wide.loc[(slice(None),
                               fields_to_check), :].isna().all().all()

    ca_state_wide = ds_in.get_regions_subset([
        Region.from_state("CA")
    ]).timeseries_bucketed_wide_dates.xs(DemographicBucket.ALL,
                                         level=PdFields.DEMOGRAPHIC_BUCKET,
                                         drop_level=False)

    # Drop location index because not used to apply to county level data
    ca_state_wide = ca_state_wide.droplevel(CommonFields.LOCATION_ID)

    ca_administered = ca_state_wide.loc(
        axis=0)[CommonFields.VACCINES_ADMINISTERED]

    initiated_ratio_of_administered = (
        ca_state_wide.loc(axis=0)[CommonFields.VACCINATIONS_INITIATED] /
        ca_administered)
    completed_ratio_of_administered = (
        ca_state_wide.loc(axis=0)[CommonFields.VACCINATIONS_COMPLETED] /
        ca_administered)

    county_administered = ca_county_wide.loc(
        axis=0)[:, CommonFields.VACCINES_ADMINISTERED]

    estimated_initiated = county_administered * initiated_ratio_of_administered
    estimated_completed = county_administered * completed_ratio_of_administered

    vaccines_initiated_pct = (estimated_initiated.div(
        ca_county_dataset.static.loc[:, CommonFields.POPULATION],
        level=CommonFields.LOCATION_ID,
        axis="index",
    ) * 100)
    vaccines_initiated_pct = vaccines_initiated_pct.rename(
        index={
            CommonFields.VACCINES_ADMINISTERED:
            CommonFields.VACCINATIONS_INITIATED_PCT
        },
        level=PdFields.VARIABLE,
    )

    vaccines_completed_pct = (estimated_completed.div(
        ca_county_dataset.static.loc[:, CommonFields.POPULATION],
        level=CommonFields.LOCATION_ID,
        axis="index",
    ) * 100)
    vaccines_completed_pct = vaccines_completed_pct.rename(
        index={
            CommonFields.VACCINES_ADMINISTERED:
            CommonFields.VACCINATIONS_COMPLETED_PCT
        },
        level=PdFields.VARIABLE,
    )

    all_wide = ds_in.timeseries_bucketed_wide_dates
    # Because we assert that existing dataset does not have CA county VACCINATIONS_COMPLETED_PCT
    # or VACCINATIONS_INITIATED_PCT we can safely combine the existing rows with new derived rows
    return ds_in.replace_timeseries_wide_dates(
        [vaccines_completed_pct, vaccines_initiated_pct, all_wide])