Beispiel #1
0
def test_multi_region_get_one_region():
    ts = timeseries.MultiRegionTimeseriesDataset.from_csv(
        io.StringIO(
            "location_id,county,aggregate_level,date,m1,m2\n"
            "iso1:us#fips:97111,Bar County,county,2020-04-02,2,\n"
            "iso1:us#fips:97222,Foo County,county,2020-04-01,,10\n"
            "iso1:us#fips:97111,Bar County,county,,3,\n"
            "iso1:us#fips:97222,Foo County,county,,,11\n"
        )
    )
    region_97111_ts = ts.get_one_region(Region.from_fips("97111"))
    assert to_dict(["date"], region_97111_ts.data[["date", "m1", "m2"]]) == {
        pd.to_datetime("2020-04-02"): {"m1": 2}
    }
    assert region_97111_ts.latest["m1"] == 3

    region_97222_ts = ts.get_one_region(Region.from_fips("97222"))
    assert to_dict(["date"], region_97222_ts.data) == {
        pd.to_datetime("2020-04-01"): {
            "m2": 10,
            "county": "Foo County",
            "fips": "97222",
            "location_id": "iso1:us#fips:97222",
            "aggregate_level": "county",
        }
    }
    assert region_97222_ts.latest["m2"] == 11
Beispiel #2
0
def test_dataclass_include_exclude():
    orig_data_source_cls = CANScraperUSAFactsProvider
    orig_ds = orig_data_source_cls.make_dataset()
    assert "iso1:us#iso2:us-tx" in orig_ds.static.index
    assert "iso1:us#iso2:us-ny" in orig_ds.static.index

    ny_source = combined_datasets.datasource_regions(
        orig_data_source_cls, RegionMask(states=["NY"])
    )
    assert ny_source.SOURCE_NAME == orig_data_source_cls.SOURCE_NAME
    assert ny_source.EXPECTED_FIELDS == orig_data_source_cls.EXPECTED_FIELDS
    ny_ds = ny_source.make_dataset()
    assert "iso1:us#iso2:us-tx" not in ny_ds.static.index
    assert "iso1:us#iso2:us-ny" in ny_ds.static.index

    ca_counties_without_la_source = combined_datasets.datasource_regions(
        orig_data_source_cls,
        RegionMask(AggregationLevel.COUNTY, states=["CA"]),
        exclude=Region.from_fips("06037"),
    )
    ds = ca_counties_without_la_source.make_dataset()
    assert "iso1:us#iso2:us-tx" not in ds.static.index
    assert "iso1:us#iso2:us-ca" not in ds.static.index
    assert "iso1:us#iso2:us-ca#fips:06045" in ds.static.index
    assert "iso1:us#iso2:us-ca#fips:06037" not in ds.static.index

    # Just Cook County, IL
    ds = combined_datasets.datasource_regions(
        orig_data_source_cls, include=Region.from_fips("17031")
    ).make_dataset()
    assert ds.static.index.to_list() == ["iso1:us#iso2:us-il#fips:17031"]
Beispiel #3
0
def test_provenance():
    region_as = Region.from_state("AS")
    region_tx = Region.from_state("TX")
    metrics_as = {
        CommonFields.POSITIVE_TESTS: TimeseriesLiteral([0, 2, 4, 6], provenance="pt_src1"),
        CommonFields.TOTAL_TESTS: [100, 200, 300, 400],
    }
    metrics_tx = {
        CommonFields.POSITIVE_TESTS: TimeseriesLiteral([1, 2, 3, 4], provenance="pt_src2"),
        CommonFields.POSITIVE_TESTS_VIRAL: TimeseriesLiteral(
            [10, 20, 30, 40], provenance="pos_viral"
        ),
        CommonFields.TOTAL_TESTS: [100, 200, 300, 400],
    }
    dataset_in = test_helpers.build_dataset({region_as: metrics_as, region_tx: metrics_tx})

    methods = [
        DivisionMethod(
            DatasetName("method1"), CommonFields.POSITIVE_TESTS_VIRAL, CommonFields.TOTAL_TESTS
        ),
        DivisionMethod(
            DatasetName("method2"), CommonFields.POSITIVE_TESTS, CommonFields.TOTAL_TESTS
        ),
    ]
    all_methods = AllMethods.run(dataset_in, methods, diff_days=3)

    expected_as = {CommonFields.TEST_POSITIVITY: TimeseriesLiteral([0.02], provenance=["pt_src1"])}
    expected_tx = {CommonFields.TEST_POSITIVITY: TimeseriesLiteral([0.1], provenance="pos_viral")}
    expected_positivity = test_helpers.build_dataset(
        {region_as: expected_as, region_tx: expected_tx}, start_date="2020-04-04"
    )
    test_helpers.assert_dataset_like(all_methods.test_positivity, expected_positivity)
Beispiel #4
0
def test_multi_region_to_from_timeseries_and_latest_values(tmp_path: pathlib.Path):
    ts = timeseries.TimeseriesDataset(
        read_csv_and_index_fips_date(
            "fips,county,aggregate_level,date,m1,m2\n"
            "97111,Bar County,county,2020-04-02,2,\n"
            "97222,Foo County,county,2020-04-01,,10\n"
            "01,,state,2020-04-01,,20\n"
        ).reset_index()
    )
    latest_values = timeseries.LatestValuesDataset(
        read_csv_and_index_fips(
            "fips,county,aggregate_level,c1,c2\n"
            "97111,Bar County,county,3,\n"
            "97222,Foo County,county,4,10.5\n"
            "01,,state,,123.4\n"
        ).reset_index()
    )
    multiregion = timeseries.MultiRegionTimeseriesDataset.from_timeseries_and_latest(
        ts, latest_values
    )
    region_97111 = multiregion.get_one_region(Region.from_fips("97111"))
    assert region_97111.date_indexed.at["2020-04-02", "m1"] == 2
    assert region_97111.latest["c1"] == 3
    assert multiregion.get_one_region(Region.from_fips("01")).latest["c2"] == 123.4

    csv_path = tmp_path / "multiregion.csv"
    multiregion.to_csv(csv_path)
    multiregion_loaded = timeseries.MultiRegionTimeseriesDataset.from_csv(csv_path)
    region_97111 = multiregion_loaded.get_one_region(Region.from_fips("97111"))
    assert region_97111.date_indexed.at["2020-04-02", "m1"] == 2
    assert region_97111.latest["c1"] == 3
    assert multiregion_loaded.get_one_region(Region.from_fips("01")).latest["c2"] == 123.4
Beispiel #5
0
def get_run_artifact_path(region: Region,
                          artifact: RunArtifact,
                          output_dir=None) -> str:
    """
    Get an artifact path for a given locale and artifact type.

    Parameters
    ----------
    fips: str
        State or county fips code. Can also be a 2 character state abbreviation.
        If arbitrary string (e.g. for tests) then passed through
    artifact: RunArtifact
        The artifact type to retrieve the pointer for.
    output_dir: str or NoneType
        Output directory to obtain the path for.

    Returns
    -------
    path: str
        Location of the artifact.
    """

    output_dir = output_dir or OUTPUT_DIR

    if region.level is AggregationLevel.COUNTY:
        state_name = region.get_state_region().state_obj().name
        county = combined_datasets.get_county_name(region)
        readable_name = f"{state_name}__{county}__{region.fips}"
        folder = REPORTS_FOLDER(output_dir, state_name)
    elif region.level is AggregationLevel.STATE:
        state_name = region.state_obj().name
        readable_name = f"{state_name}__{region.fips}"
        folder = os.path.join(STATE_SUMMARY_FOLDER(output_dir), "reports")
    elif region.level is AggregationLevel.CBSA:
        readable_name = f"CBSA__{region.fips}"
        folder = os.path.join(STATE_SUMMARY_FOLDER(output_dir), "reports")
    elif region.level is AggregationLevel.PLACE:
        state_name = region.get_state_region().state_obj().name
        readable_name = f"{state_name}__{region.fips}"
        folder = os.path.join(STATE_SUMMARY_FOLDER(output_dir), "reports")
    elif region.level is AggregationLevel.COUNTRY:
        readable_name = region.country
        folder = os.path.join(output_dir, "pyseir", "reports")
    else:
        raise AssertionError(f"Unsupported aggregation level {region.level}")

    artifact = RunArtifact(artifact)

    if artifact is RunArtifact.RT_INFERENCE_REPORT:
        path = os.path.join(folder, f"Rt_results__{readable_name}.pdf")

    elif artifact is RunArtifact.RT_SMOOTHING_REPORT:
        path = os.path.join(folder, f"Rt_smoothing__{readable_name}.pdf")
    else:
        raise ValueError(f"No paths available for artifact {RunArtifact}")

    os.makedirs(os.path.dirname(path), exist_ok=True)
    return path
Beispiel #6
0
def test_regions_in_states_basic():
    whitelist_df = read_csv_and_index_fips(
        "fips,state,county,inference_ok\n"
        "45111,TX,Bar County,True\n"
        "06222,CA,Foo County,True\n").reset_index()

    regions = regions_in_states(
        [pipeline.Region.from_state(s) for s in ["CA", "TX"]], whitelist_df)
    assert set(regions) == {
        Region.from_fips("45111"),
        Region.from_fips("06222")
    }
Beispiel #7
0
def test_aggregate():
    df_in = read_csv_and_index_fips_date(
        "fips,state,aggregate_level,county,m1,date,foo\n"
        "55005,ZZ,county,North County,1,2020-05-01,11\n"
        "55005,ZZ,county,North County,2,2020-05-02,22\n"
        "55005,ZZ,county,North County,3,2020-05-03,33\n"
        "55005,ZZ,county,North County,0,2020-05-04,0\n"
        "55006,ZZ,county,South County,0,2020-05-01,0\n"
        "55006,ZZ,county,South County,0,2020-05-02,0\n"
        "55006,ZZ,county,South County,3,2020-05-03,44\n"
        "55006,ZZ,county,South County,4,2020-05-04,55\n"
        "55,ZZ,state,Grand State,41,2020-05-01,66\n"
        "55,ZZ,state,Grand State,43,2020-05-03,77\n"
    ).reset_index()
    ts_in = MultiRegionDataset.from_fips_timeseries_df(df_in)
    agg = statistical_areas.CountyToCBSAAggregator(
        county_map={"55005": "10001", "55006": "10001"},
        cbsa_title_map={"10001": "Stat Area 1"},
        aggregations=[],
    )
    ts_out = agg.aggregate(ts_in)

    assert ts_out.groupby_region().ngroups == 1

    ts_cbsa = ts_out.get_one_region(Region.from_cbsa_code("10001"))
    assert ts_cbsa.date_indexed["m1"].to_dict() == {
        pd.to_datetime("2020-05-01"): 1,
        pd.to_datetime("2020-05-02"): 2,
        pd.to_datetime("2020-05-03"): 6,
        pd.to_datetime("2020-05-04"): 4,
    }
Beispiel #8
0
def test_top_level_metrics_with_rt():
    region = Region.from_fips("36")
    data = (
        "date,fips,cases,positive_tests,negative_tests,contact_tracers_count"
        ",current_icu,current_icu_total,icu_beds\n"
        "2020-08-17,36,10,10,90,1,,,\n"
        "2020-08-18,36,20,20,180,2,,,\n"
        "2020-08-19,36,,,,3,,,\n"
        "2020-08-20,36,40,40,360,4,,,\n")
    one_region = _fips_csv_to_one_region(data, region)

    data = ("date,fips,Rt_MAP_composite,Rt_ci95_composite\n"
            "2020-08-17,36,1.1,1.2\n"
            "2020-08-18,36,1.2,1.3\n"
            "2020-08-19,36,1.1,1.3\n"
            "2020-08-20,36,1.1,1.2\n")
    rt_data = _fips_csv_to_one_region(data, region)

    latest = {
        CommonFields.POPULATION: 100_000,
        CommonFields.FIPS: "36",
        CommonFields.STATE: "NY",
        CommonFields.ICU_TYPICAL_OCCUPANCY_RATE: 0.5,
        CommonFields.ICU_BEDS: 25,
    }
Beispiel #9
0
def test_aggregate():
    ts = TimeseriesDataset.load_csv(
        io.StringIO("fips,state,aggregate_level,county,m1,date,foo\n"
                    "55005,ZZ,county,North County,1,2020-05-01,ab\n"
                    "55005,ZZ,county,North County,2,2020-05-02,cd\n"
                    "55005,ZZ,county,North County,3,2020-05-03,ef\n"
                    "55006,ZZ,county,South County,3,2020-05-03,ef\n"
                    "55006,ZZ,county,South County,4,2020-05-04,gh\n"
                    "55,ZZ,state,Grand State,41,2020-05-01,ij\n"
                    "55,ZZ,state,Grand State,43,2020-05-03,kl\n"))
    ts_in = MultiRegionTimeseriesDataset.from_timeseries_and_latest(
        ts, ts.latest_values_object())
    agg = statistical_areas.CountyToCBSAAggregator(
        county_map={
            "55005": "10001",
            "55006": "10001"
        },
        cbsa_title_map={"10001": "Stat Area 1"})
    ts_out = agg.aggregate(ts_in)

    assert ts_out.groupby_region().ngroups == 1

    ts_cbsa = ts_out.get_one_region(Region.from_cbsa_code("10001"))
    assert ts_cbsa.date_indexed["m1"].to_dict() == {
        pd.to_datetime("2020-05-01"): 1,
        pd.to_datetime("2020-05-02"): 2,
        pd.to_datetime("2020-05-03"): 6,
        pd.to_datetime("2020-05-04"): 4,
    }
Beispiel #10
0
def test_combined_county_has_some_data(fips):
    region_data = combined_datasets.load_us_timeseries_dataset().get_one_region(
        Region.from_fips(fips)
    )
    assert region_data.data[CommonFields.POSITIVE_TESTS].all()
    assert region_data.data[CommonFields.NEGATIVE_TESTS].all()
    assert region_data.latest[CommonFields.DEATHS] > 1
def test_calculate_icu_capacity():
    region = Region.from_fips("36")
    latest = {
        CommonFields.POPULATION: 100_000,
        CommonFields.FIPS: "36",
        CommonFields.STATE: "NY",
    }
Beispiel #12
0
def test_annotation(rt_dataset, icu_dataset):
    region = Region.from_state("IL")
    tag = test_helpers.make_tag(date="2020-04-01", original_observation=10.0)
    death_url = UrlStr("http://can.com/death_source")
    cases_urls = [UrlStr("http://can.com/one"), UrlStr("http://can.com/two")]
    new_cases_url = UrlStr("http://can.com/new_cases")

    ds = test_helpers.build_default_region_dataset(
        {
            CommonFields.CASES:
            TimeseriesLiteral(
                [100, 200, 300], provenance="NYTimes", source_url=cases_urls),
            # NEW_CASES has only source_url set, to make sure that an annotation is still output.
            CommonFields.NEW_CASES:
            TimeseriesLiteral([100, 100, 100], source_url=new_cases_url),
            CommonFields.CONTACT_TRACERS_COUNT: [10] * 3,
            CommonFields.ICU_BEDS:
            TimeseriesLiteral([20, 20, 20], provenance="NotFound"),
            CommonFields.CURRENT_ICU: [5, 5, 5],
            CommonFields.DEATHS:
            TimeseriesLiteral(
                [2, 3, 2], annotation=[tag], source_url=death_url),
        },
        region=region,
        static={
            CommonFields.POPULATION: 100_000,
            CommonFields.STATE: "IL",
            CommonFields.CAN_LOCATION_PAGE_URL:
            "http://covidactnow.org/foo/bar",
        },
    )
def modify_dataset(ds: MultiRegionDataset) -> MultiRegionDataset:
    ts_copy = ds.timeseries.copy()
    # Test positivity should be a ratio
    ts_copy.loc[:, CommonFields.TEST_POSITIVITY_7D] = (
        ts_copy.loc[:, CommonFields.TEST_POSITIVITY_7D] / 100.0)

    levels = set(
        Region.from_location_id(l).level for l in
        ds.timeseries.index.get_level_values(CommonFields.LOCATION_ID))
    # Should only be picking up county all_df for now.  May need additional logic if states
    # are included as well
    assert levels == {AggregationLevel.COUNTY}

    # Duplicating DC County results as state results because of a downstream
    # use of how dc state data is used to override DC county data.
    dc_results = ts_copy.xs(DC_COUNTY_LOCATION_ID,
                            axis=0,
                            level=CommonFields.LOCATION_ID,
                            drop_level=False)
    dc_results = dc_results.rename(
        index={DC_COUNTY_LOCATION_ID: DC_STATE_LOCATION_ID},
        level=CommonFields.LOCATION_ID)

    ts_copy = ts_copy.append(dc_results, verify_integrity=True).sort_index()

    return dataclasses.replace(ds,
                               timeseries=remove_trailing_zeros(ts_copy),
                               timeseries_bucketed=None)
def test_dataclass_include_exclude():
    """Tests datasource_regions using mock data for speed."""
    region_data = {CommonFields.CASES: [100, 200, 300], CommonFields.DEATHS: [0, 1, 2]}
    regions_orig = [Region.from_state(state) for state in "AZ CA NY IL TX".split()] + [
        Region.from_fips(fips) for fips in "06037 06045 17031 17201".split()
    ]
    dataset_orig = test_helpers.build_dataset({region: region_data for region in regions_orig})

    # Make a new subclass to keep this test separate from others in the make_dataset lru_cache.
    class DataSourceForTest(data_source.DataSource):
        EXPECTED_FIELDS = [CommonFields.CASES, CommonFields.DEATHS]
        SOURCE_TYPE = "DataSourceForTest"

        @classmethod
        def make_dataset(cls) -> timeseries.MultiRegionDataset:
            return dataset_orig

    orig_data_source_cls = DataSourceForTest
    orig_ds = orig_data_source_cls.make_dataset()
    assert "iso1:us#iso2:us-tx" in orig_ds.location_ids
    assert "iso1:us#iso2:us-ny" in orig_ds.location_ids

    ny_source = combined_datasets.datasource_regions(
        orig_data_source_cls, RegionMask(states=["NY"])
    )
    ny_ds = ny_source.make_dataset()
    assert "iso1:us#iso2:us-tx" not in ny_ds.location_ids
    assert "iso1:us#iso2:us-ny" in ny_ds.location_ids

    ca_counties_without_la_source = combined_datasets.datasource_regions(
        orig_data_source_cls,
        RegionMask(AggregationLevel.COUNTY, states=["CA"]),
        exclude=Region.from_fips("06037"),
    )
    ds = ca_counties_without_la_source.make_dataset()
    assert "iso1:us#iso2:us-tx" not in ds.location_ids
    assert "iso1:us#iso2:us-ca" not in ds.location_ids
    assert "iso1:us#iso2:us-ca#fips:06045" in ds.location_ids
    assert "iso1:us#iso2:us-ca#fips:06037" not in ds.location_ids

    # Just Cook County, IL
    ds = combined_datasets.datasource_regions(
        orig_data_source_cls, include=Region.from_fips("17031")
    ).make_dataset()
    assert ds.location_ids.to_list() == ["iso1:us#iso2:us-il#fips:17031"]
def il_regional_input(rt_dataset, icu_dataset):
    region = Region.from_state("IL")
    regional_data = combined_datasets.load_us_timeseries_dataset(
    ).get_regions_subset([region])
    # TODO(tom): add test positivity back in after PR 728 is merged.
    # test_positivity_results = test_positivity.AllMethods.run(regional_data)
    # regional_data = regional_data.join_columns(test_positivity_results.test_positivity)
    return api_v2_pipeline.RegionalInput.from_region_and_model_output(
        region, regional_data, rt_dataset, icu_dataset)
 def iter_one_regions(
         self) -> Iterable[Tuple[Region, OneRegionTimeseriesDataset]]:
     """Iterates through all the regions in this object"""
     for location_id, data_group in self.data_with_fips.groupby(
             CommonFields.LOCATION_ID):
         latest_dict = self._location_id_latest_dict(location_id)
         yield Region(location_id=location_id,
                      fips=None), OneRegionTimeseriesDataset(
                          data_group, latest_dict)
Beispiel #17
0
def il_regional_input(rt_dataset, icu_dataset):
    region = Region.from_state("IL")
    regional_data = combined_datasets.load_us_timeseries_dataset(
    ).get_regions_subset([region])
    regional_data = test_positivity.run_and_maybe_join_columns(
        regional_data, structlog.get_logger())

    return api_v2_pipeline.RegionalInput.from_region_and_model_output(
        region, regional_data, rt_dataset, icu_dataset)
Beispiel #18
0
def test_pyseir_end_to_end_dc(tmp_path):
    # Runs over a single state which tests state filtering + running over more than
    # a single fips.
    with unittest.mock.patch("pyseir.utils.OUTPUT_DIR", str(tmp_path)):
        region = Region.from_state("DC")
        pipelines = cli._build_all_for_states(states=["DC"])
        # Checking to make sure that build all for states properly filters and only
        # returns DC data
        assert len(pipelines) == 2
def test_top_level_metrics_with_rt():
    region = Region.from_fips("36")
    latest = {
        CommonFields.POPULATION: 100_000,
        CommonFields.FIPS: "36",
        CommonFields.STATE: "NY",
        CommonFields.ICU_TYPICAL_OCCUPANCY_RATE: 0.5,
        CommonFields.ICU_BEDS: 25,
    }
def test_load_hospitalization_data():
    t0 = datetime(year=2020, month=1, day=1)
    region = Region.from_fips("33")
    hospitalization_df = load_data.get_hospitalization_data_for_region(region)

    _, _, hosp_type = load_data.calculate_hospitalization_data(
        hospitalization_df, t0, category=HospitalizationCategory.ICU
    )
    # Double check that data loads and it went throughh the cumulative hosps
    assert hosp_type is HospitalizationDataType.CUMULATIVE_HOSPITALIZATIONS
 def from_state_region(region: pipeline.Region) -> "RegionalInput":
     """Creates a RegionalInput for given state region."""
     assert region.is_state()
     hospitalization_df = load_data.get_hospitalization_data_for_region(
         region)
     return RegionalInput(
         region=region,
         _combined_data=combined_datasets.RegionalData.from_region(region),
         _hospitalization_df=hospitalization_df,
     )
Beispiel #22
0
def test_preserve_tags():
    region_as = Region.from_state("AS")
    region_tx = Region.from_state("TX")
    tag1 = test_helpers.make_tag(type=TagType.CUMULATIVE_LONG_TAIL_TRUNCATED, date="2020-04-04")
    tag2 = test_helpers.make_tag(type=TagType.CUMULATIVE_TAIL_TRUNCATED, date="2020-04-04")
    tag_drop = test_helpers.make_tag(type=TagType.ZSCORE_OUTLIER, date="2020-04-01")
    tag3 = test_helpers.make_tag(type=TagType.ZSCORE_OUTLIER, date="2020-04-04")
    tag4 = test_helpers.make_tag(type=TagType.ZSCORE_OUTLIER, date="2020-04-03")
    metrics_as = {
        CommonFields.POSITIVE_TESTS: TimeseriesLiteral(
            [1, 2, 3, 4], annotation=[tag1], provenance="pos"
        ),
        CommonFields.TOTAL_TESTS: TimeseriesLiteral([100, 200, 300, 400], annotation=[tag2]),
    }
    metrics_tx = {
        CommonFields.POSITIVE_TESTS: TimeseriesLiteral([None, None, 3, 4], annotation=[tag_drop]),
        CommonFields.POSITIVE_TESTS_VIRAL: [10, 20, 30, 40],
        CommonFields.TOTAL_TESTS: TimeseriesLiteral([100, 200, 300, 400], annotation=[tag3, tag4]),
    }
    dataset_in = test_helpers.build_dataset({region_as: metrics_as, region_tx: metrics_tx})

    methods = [
        DivisionMethod(
            DatasetName("method1"), CommonFields.POSITIVE_TESTS, CommonFields.TOTAL_TESTS
        ),
        DivisionMethod(
            DatasetName("method2"), CommonFields.POSITIVE_TESTS_VIRAL, CommonFields.TOTAL_TESTS
        ),
    ]
    all_methods = AllMethods.run(dataset_in, methods, diff_days=3)

    expected_as = {
        CommonFields.TEST_POSITIVITY: TimeseriesLiteral(
            [0.01], provenance="pos", annotation=[tag1, tag2]
        )
    }
    expected_tx = {CommonFields.TEST_POSITIVITY: TimeseriesLiteral([0.1], annotation=[tag3, tag4])}
    expected_positivity = test_helpers.build_dataset(
        {region_as: expected_as, region_tx: expected_tx}, start_date="2020-04-04"
    )
    test_helpers.assert_dataset_like(all_methods.test_positivity, expected_positivity)
Beispiel #23
0
def test_make_latest_from_timeseries_simple():
    data = read_csv_and_index_fips_date(
        "fips,county,state,country,date,aggregate_level,m1,m2\n"
        "97123,Smith County,ZZ,USA,2020-04-01,county,1,\n"
        "97123,Smith County,ZZ,USA,2020-04-02,county,,2\n"
    ).reset_index()
    ds = timeseries.MultiRegionDataset.from_fips_timeseries_df(data)
    region = ds.get_one_region(Region.from_fips("97123"))
    # Compare 2 values in region.latest
    expected = {"m1": 1, "m2": 2}
    actual = {key: region.latest[key] for key in expected.keys()}
    assert actual == expected
def test_combined_county_has_some_timeseries_data(fips):
    region = Region.from_fips(fips)
    latest = combined_datasets.load_us_timeseries_dataset().get_one_region(
        region)
    df = latest.data.set_index(CommonFields.DATE)
    assert df.loc["2020-05-01", CommonFields.CASES] > 0
    assert df.loc["2020-05-01", CommonFields.DEATHS] > 0
    if fips.startswith(
            "06"
    ):  # TODO(tom): Remove this condition when we have county data in TX too.
        assert df.loc["2020-05-01", CommonFields.POSITIVE_TESTS] > 0
        assert df.loc["2020-05-01", CommonFields.NEGATIVE_TESTS] > 0
        assert df.loc["2020-05-01", CommonFields.CURRENT_ICU] > 0
def test_persist_and_load_dataset(tmp_path, nyc_fips):
    region = Region.from_fips(nyc_fips)
    dataset = combined_datasets.load_us_timeseries_dataset()
    timeseries_nyc = dataset.get_regions_subset([region])

    pointer = combined_dataset_utils.persist_dataset(timeseries_nyc, tmp_path)

    downloaded_dataset = MultiRegionDataset.read_from_pointer(pointer)
    differ_l = DatasetDiff.make(downloaded_dataset.timeseries)
    differ_r = DatasetDiff.make(timeseries_nyc.timeseries)
    differ_l.compare(differ_r)

    assert not len(differ_l.my_ts)
Beispiel #26
0
def update_test_combined_data(truncate_dates: bool, state: List[str]):
    us_dataset = combined_datasets.load_us_timeseries_dataset()
    # Keep only a small subset of the regions so we have enough to exercise our code in tests.
    test_subset = us_dataset.get_regions_subset([
        RegionMask(states=[s.strip() for s in state]),
        Region.from_fips("48201"),
        Region.from_fips("48301"),
        Region.from_fips("20161"),
        Region.from_state("TX"),
        Region.from_state("KS"),
    ])
    if truncate_dates:
        dates = test_subset.timeseries_bucketed.index.get_level_values(
            CommonFields.DATE)
        date_range_mask = (dates >= "2021-01-01") & (dates < "2021-04-01")
        test_subset = dataclasses.replace(
            test_subset,
            timeseries_bucketed=test_subset.timeseries_bucketed.
            loc[date_range_mask])
    test_subset.write_to_wide_dates_csv(
        dataset_utils.TEST_COMBINED_WIDE_DATES_CSV_PATH,
        dataset_utils.TEST_COMBINED_STATIC_CSV_PATH)
def test_top_level_metrics_no_pos_neg_tests_has_positivity_ratio():
    ny_region = Region.from_state("NY")
    metrics = {
        CommonFields.CASES: [10, 20, 30, 40],
        CommonFields.NEW_CASES: [10, 10, 10, 10],
        CommonFields.TEST_POSITIVITY: [0.02, 0.03, 0.04, 0.05],
    }
    latest = {
        CommonFields.POPULATION: 100_000,
        CommonFields.FIPS: "36",
        CommonFields.STATE: "NY",
        CommonFields.ICU_BEDS: 10,
    }
def test_persist_and_load_dataset(tmp_path, nyc_fips):
    region = Region.from_fips(nyc_fips)
    dataset = combined_datasets.load_us_timeseries_dataset()
    timeseries_nyc = TimeseriesDataset(dataset.get_one_region(region).data)

    pointer = combined_dataset_utils.persist_dataset(timeseries_nyc, tmp_path)

    downloaded_dataset = pointer.load_dataset()
    differ_l = DatasetDiff.make(downloaded_dataset.data)
    differ_r = DatasetDiff.make(timeseries_nyc.data)
    differ_l.compare(differ_r)

    assert not len(differ_l.my_ts)
Beispiel #29
0
def _transform_one_override(
        override: Mapping,
        cbsa_to_counties_map: Mapping[Region, List[Region]]) -> Filter:
    region_str = override["region"]
    if re.fullmatch(r"[A-Z][A-Z]", region_str):
        region = Region.from_state(region_str)
    elif re.fullmatch(r"\d{5}", region_str):
        region = Region.from_fips(region_str)
    else:
        raise ValueError(f"Invalid region: {region_str}")

    include_str = override["include"]
    if include_str == "region":
        regions_included = [region]
    elif include_str == "region-and-subregions":
        if region.is_state():
            regions_included = [RegionMask(states=[region.state])]
        elif region.level == AggregationLevel.CBSA:
            regions_included = [region] + cbsa_to_counties_map[region]
        else:
            raise ValueError(
                "region-and-subregions only valid for a state and CBSA")
    elif include_str == "subregions":
        if not region.is_state():
            raise ValueError("subregions only valid for a state")
        regions_included = [
            RegionMask(AggregationLevel.COUNTY, states=[region.state])
        ]
    else:
        raise ValueError(f"Invalid include: {include_str}")

    return Filter(
        regions_included=regions_included,
        fields_included=_METRIC_TO_FIELDS[override["metric"]],
        internal_note=override["context"],
        public_note=override.get("disclaimer", ""),
        drop_observations=bool(override["blocked"]),
    )
def test_basic():
    region_tx = Region.from_state("TX")
    region_sf = Region.from_fips("06075")
    region_hi = Region.from_state("HI")
    # Add a timeseries with a tag to make sure they are preserved.
    ts_with_tag = TimeseriesLiteral(
        [0, 0, 0], annotation=[test_helpers.make_tag(date="2020-04-01")])
    ds_in = test_helpers.build_dataset({
        region_tx: {
            CommonFields.VACCINES_DISTRIBUTED: [0, 0, 0]
        },
        region_sf: {
            CommonFields.VACCINES_DISTRIBUTED: [0, 0, 1]
        },
        region_hi: {
            CommonFields.VACCINES_DISTRIBUTED: [0, 0, None],
            CommonFields.CASES: ts_with_tag,
        },
    })

    with structlog.testing.capture_logs() as logs:
        ds_out = zeros_filter.drop_all_zero_timeseries(
            ds_in, [CommonFields.VACCINES_DISTRIBUTED])
    ds_expected = test_helpers.build_dataset({
        region_sf: {
            CommonFields.VACCINES_DISTRIBUTED: [0, 0, 1]
        },
        region_hi: {
            CommonFields.CASES: ts_with_tag
        },
    })
    log = more_itertools.one(logs)
    assert log["event"] == zeros_filter.DROPPING_TIMESERIES_WITH_ONLY_ZEROS
    assert pd.MultiIndex.from_tuples([
        (region_hi.location_id, CommonFields.VACCINES_DISTRIBUTED),
        (region_tx.location_id, CommonFields.VACCINES_DISTRIBUTED),
    ]).equals(log["dropped"])
    test_helpers.assert_dataset_like(ds_expected, ds_out)