def test_multi_region_get_one_region(): ts = timeseries.MultiRegionTimeseriesDataset.from_csv( io.StringIO( "location_id,county,aggregate_level,date,m1,m2\n" "iso1:us#fips:97111,Bar County,county,2020-04-02,2,\n" "iso1:us#fips:97222,Foo County,county,2020-04-01,,10\n" "iso1:us#fips:97111,Bar County,county,,3,\n" "iso1:us#fips:97222,Foo County,county,,,11\n" ) ) region_97111_ts = ts.get_one_region(Region.from_fips("97111")) assert to_dict(["date"], region_97111_ts.data[["date", "m1", "m2"]]) == { pd.to_datetime("2020-04-02"): {"m1": 2} } assert region_97111_ts.latest["m1"] == 3 region_97222_ts = ts.get_one_region(Region.from_fips("97222")) assert to_dict(["date"], region_97222_ts.data) == { pd.to_datetime("2020-04-01"): { "m2": 10, "county": "Foo County", "fips": "97222", "location_id": "iso1:us#fips:97222", "aggregate_level": "county", } } assert region_97222_ts.latest["m2"] == 11
def test_dataclass_include_exclude(): orig_data_source_cls = CANScraperUSAFactsProvider orig_ds = orig_data_source_cls.make_dataset() assert "iso1:us#iso2:us-tx" in orig_ds.static.index assert "iso1:us#iso2:us-ny" in orig_ds.static.index ny_source = combined_datasets.datasource_regions( orig_data_source_cls, RegionMask(states=["NY"]) ) assert ny_source.SOURCE_NAME == orig_data_source_cls.SOURCE_NAME assert ny_source.EXPECTED_FIELDS == orig_data_source_cls.EXPECTED_FIELDS ny_ds = ny_source.make_dataset() assert "iso1:us#iso2:us-tx" not in ny_ds.static.index assert "iso1:us#iso2:us-ny" in ny_ds.static.index ca_counties_without_la_source = combined_datasets.datasource_regions( orig_data_source_cls, RegionMask(AggregationLevel.COUNTY, states=["CA"]), exclude=Region.from_fips("06037"), ) ds = ca_counties_without_la_source.make_dataset() assert "iso1:us#iso2:us-tx" not in ds.static.index assert "iso1:us#iso2:us-ca" not in ds.static.index assert "iso1:us#iso2:us-ca#fips:06045" in ds.static.index assert "iso1:us#iso2:us-ca#fips:06037" not in ds.static.index # Just Cook County, IL ds = combined_datasets.datasource_regions( orig_data_source_cls, include=Region.from_fips("17031") ).make_dataset() assert ds.static.index.to_list() == ["iso1:us#iso2:us-il#fips:17031"]
def test_provenance(): region_as = Region.from_state("AS") region_tx = Region.from_state("TX") metrics_as = { CommonFields.POSITIVE_TESTS: TimeseriesLiteral([0, 2, 4, 6], provenance="pt_src1"), CommonFields.TOTAL_TESTS: [100, 200, 300, 400], } metrics_tx = { CommonFields.POSITIVE_TESTS: TimeseriesLiteral([1, 2, 3, 4], provenance="pt_src2"), CommonFields.POSITIVE_TESTS_VIRAL: TimeseriesLiteral( [10, 20, 30, 40], provenance="pos_viral" ), CommonFields.TOTAL_TESTS: [100, 200, 300, 400], } dataset_in = test_helpers.build_dataset({region_as: metrics_as, region_tx: metrics_tx}) methods = [ DivisionMethod( DatasetName("method1"), CommonFields.POSITIVE_TESTS_VIRAL, CommonFields.TOTAL_TESTS ), DivisionMethod( DatasetName("method2"), CommonFields.POSITIVE_TESTS, CommonFields.TOTAL_TESTS ), ] all_methods = AllMethods.run(dataset_in, methods, diff_days=3) expected_as = {CommonFields.TEST_POSITIVITY: TimeseriesLiteral([0.02], provenance=["pt_src1"])} expected_tx = {CommonFields.TEST_POSITIVITY: TimeseriesLiteral([0.1], provenance="pos_viral")} expected_positivity = test_helpers.build_dataset( {region_as: expected_as, region_tx: expected_tx}, start_date="2020-04-04" ) test_helpers.assert_dataset_like(all_methods.test_positivity, expected_positivity)
def test_multi_region_to_from_timeseries_and_latest_values(tmp_path: pathlib.Path): ts = timeseries.TimeseriesDataset( read_csv_and_index_fips_date( "fips,county,aggregate_level,date,m1,m2\n" "97111,Bar County,county,2020-04-02,2,\n" "97222,Foo County,county,2020-04-01,,10\n" "01,,state,2020-04-01,,20\n" ).reset_index() ) latest_values = timeseries.LatestValuesDataset( read_csv_and_index_fips( "fips,county,aggregate_level,c1,c2\n" "97111,Bar County,county,3,\n" "97222,Foo County,county,4,10.5\n" "01,,state,,123.4\n" ).reset_index() ) multiregion = timeseries.MultiRegionTimeseriesDataset.from_timeseries_and_latest( ts, latest_values ) region_97111 = multiregion.get_one_region(Region.from_fips("97111")) assert region_97111.date_indexed.at["2020-04-02", "m1"] == 2 assert region_97111.latest["c1"] == 3 assert multiregion.get_one_region(Region.from_fips("01")).latest["c2"] == 123.4 csv_path = tmp_path / "multiregion.csv" multiregion.to_csv(csv_path) multiregion_loaded = timeseries.MultiRegionTimeseriesDataset.from_csv(csv_path) region_97111 = multiregion_loaded.get_one_region(Region.from_fips("97111")) assert region_97111.date_indexed.at["2020-04-02", "m1"] == 2 assert region_97111.latest["c1"] == 3 assert multiregion_loaded.get_one_region(Region.from_fips("01")).latest["c2"] == 123.4
def get_run_artifact_path(region: Region, artifact: RunArtifact, output_dir=None) -> str: """ Get an artifact path for a given locale and artifact type. Parameters ---------- fips: str State or county fips code. Can also be a 2 character state abbreviation. If arbitrary string (e.g. for tests) then passed through artifact: RunArtifact The artifact type to retrieve the pointer for. output_dir: str or NoneType Output directory to obtain the path for. Returns ------- path: str Location of the artifact. """ output_dir = output_dir or OUTPUT_DIR if region.level is AggregationLevel.COUNTY: state_name = region.get_state_region().state_obj().name county = combined_datasets.get_county_name(region) readable_name = f"{state_name}__{county}__{region.fips}" folder = REPORTS_FOLDER(output_dir, state_name) elif region.level is AggregationLevel.STATE: state_name = region.state_obj().name readable_name = f"{state_name}__{region.fips}" folder = os.path.join(STATE_SUMMARY_FOLDER(output_dir), "reports") elif region.level is AggregationLevel.CBSA: readable_name = f"CBSA__{region.fips}" folder = os.path.join(STATE_SUMMARY_FOLDER(output_dir), "reports") elif region.level is AggregationLevel.PLACE: state_name = region.get_state_region().state_obj().name readable_name = f"{state_name}__{region.fips}" folder = os.path.join(STATE_SUMMARY_FOLDER(output_dir), "reports") elif region.level is AggregationLevel.COUNTRY: readable_name = region.country folder = os.path.join(output_dir, "pyseir", "reports") else: raise AssertionError(f"Unsupported aggregation level {region.level}") artifact = RunArtifact(artifact) if artifact is RunArtifact.RT_INFERENCE_REPORT: path = os.path.join(folder, f"Rt_results__{readable_name}.pdf") elif artifact is RunArtifact.RT_SMOOTHING_REPORT: path = os.path.join(folder, f"Rt_smoothing__{readable_name}.pdf") else: raise ValueError(f"No paths available for artifact {RunArtifact}") os.makedirs(os.path.dirname(path), exist_ok=True) return path
def test_regions_in_states_basic(): whitelist_df = read_csv_and_index_fips( "fips,state,county,inference_ok\n" "45111,TX,Bar County,True\n" "06222,CA,Foo County,True\n").reset_index() regions = regions_in_states( [pipeline.Region.from_state(s) for s in ["CA", "TX"]], whitelist_df) assert set(regions) == { Region.from_fips("45111"), Region.from_fips("06222") }
def test_aggregate(): df_in = read_csv_and_index_fips_date( "fips,state,aggregate_level,county,m1,date,foo\n" "55005,ZZ,county,North County,1,2020-05-01,11\n" "55005,ZZ,county,North County,2,2020-05-02,22\n" "55005,ZZ,county,North County,3,2020-05-03,33\n" "55005,ZZ,county,North County,0,2020-05-04,0\n" "55006,ZZ,county,South County,0,2020-05-01,0\n" "55006,ZZ,county,South County,0,2020-05-02,0\n" "55006,ZZ,county,South County,3,2020-05-03,44\n" "55006,ZZ,county,South County,4,2020-05-04,55\n" "55,ZZ,state,Grand State,41,2020-05-01,66\n" "55,ZZ,state,Grand State,43,2020-05-03,77\n" ).reset_index() ts_in = MultiRegionDataset.from_fips_timeseries_df(df_in) agg = statistical_areas.CountyToCBSAAggregator( county_map={"55005": "10001", "55006": "10001"}, cbsa_title_map={"10001": "Stat Area 1"}, aggregations=[], ) ts_out = agg.aggregate(ts_in) assert ts_out.groupby_region().ngroups == 1 ts_cbsa = ts_out.get_one_region(Region.from_cbsa_code("10001")) assert ts_cbsa.date_indexed["m1"].to_dict() == { pd.to_datetime("2020-05-01"): 1, pd.to_datetime("2020-05-02"): 2, pd.to_datetime("2020-05-03"): 6, pd.to_datetime("2020-05-04"): 4, }
def test_top_level_metrics_with_rt(): region = Region.from_fips("36") data = ( "date,fips,cases,positive_tests,negative_tests,contact_tracers_count" ",current_icu,current_icu_total,icu_beds\n" "2020-08-17,36,10,10,90,1,,,\n" "2020-08-18,36,20,20,180,2,,,\n" "2020-08-19,36,,,,3,,,\n" "2020-08-20,36,40,40,360,4,,,\n") one_region = _fips_csv_to_one_region(data, region) data = ("date,fips,Rt_MAP_composite,Rt_ci95_composite\n" "2020-08-17,36,1.1,1.2\n" "2020-08-18,36,1.2,1.3\n" "2020-08-19,36,1.1,1.3\n" "2020-08-20,36,1.1,1.2\n") rt_data = _fips_csv_to_one_region(data, region) latest = { CommonFields.POPULATION: 100_000, CommonFields.FIPS: "36", CommonFields.STATE: "NY", CommonFields.ICU_TYPICAL_OCCUPANCY_RATE: 0.5, CommonFields.ICU_BEDS: 25, }
def test_aggregate(): ts = TimeseriesDataset.load_csv( io.StringIO("fips,state,aggregate_level,county,m1,date,foo\n" "55005,ZZ,county,North County,1,2020-05-01,ab\n" "55005,ZZ,county,North County,2,2020-05-02,cd\n" "55005,ZZ,county,North County,3,2020-05-03,ef\n" "55006,ZZ,county,South County,3,2020-05-03,ef\n" "55006,ZZ,county,South County,4,2020-05-04,gh\n" "55,ZZ,state,Grand State,41,2020-05-01,ij\n" "55,ZZ,state,Grand State,43,2020-05-03,kl\n")) ts_in = MultiRegionTimeseriesDataset.from_timeseries_and_latest( ts, ts.latest_values_object()) agg = statistical_areas.CountyToCBSAAggregator( county_map={ "55005": "10001", "55006": "10001" }, cbsa_title_map={"10001": "Stat Area 1"}) ts_out = agg.aggregate(ts_in) assert ts_out.groupby_region().ngroups == 1 ts_cbsa = ts_out.get_one_region(Region.from_cbsa_code("10001")) assert ts_cbsa.date_indexed["m1"].to_dict() == { pd.to_datetime("2020-05-01"): 1, pd.to_datetime("2020-05-02"): 2, pd.to_datetime("2020-05-03"): 6, pd.to_datetime("2020-05-04"): 4, }
def test_combined_county_has_some_data(fips): region_data = combined_datasets.load_us_timeseries_dataset().get_one_region( Region.from_fips(fips) ) assert region_data.data[CommonFields.POSITIVE_TESTS].all() assert region_data.data[CommonFields.NEGATIVE_TESTS].all() assert region_data.latest[CommonFields.DEATHS] > 1
def test_calculate_icu_capacity(): region = Region.from_fips("36") latest = { CommonFields.POPULATION: 100_000, CommonFields.FIPS: "36", CommonFields.STATE: "NY", }
def test_annotation(rt_dataset, icu_dataset): region = Region.from_state("IL") tag = test_helpers.make_tag(date="2020-04-01", original_observation=10.0) death_url = UrlStr("http://can.com/death_source") cases_urls = [UrlStr("http://can.com/one"), UrlStr("http://can.com/two")] new_cases_url = UrlStr("http://can.com/new_cases") ds = test_helpers.build_default_region_dataset( { CommonFields.CASES: TimeseriesLiteral( [100, 200, 300], provenance="NYTimes", source_url=cases_urls), # NEW_CASES has only source_url set, to make sure that an annotation is still output. CommonFields.NEW_CASES: TimeseriesLiteral([100, 100, 100], source_url=new_cases_url), CommonFields.CONTACT_TRACERS_COUNT: [10] * 3, CommonFields.ICU_BEDS: TimeseriesLiteral([20, 20, 20], provenance="NotFound"), CommonFields.CURRENT_ICU: [5, 5, 5], CommonFields.DEATHS: TimeseriesLiteral( [2, 3, 2], annotation=[tag], source_url=death_url), }, region=region, static={ CommonFields.POPULATION: 100_000, CommonFields.STATE: "IL", CommonFields.CAN_LOCATION_PAGE_URL: "http://covidactnow.org/foo/bar", }, )
def modify_dataset(ds: MultiRegionDataset) -> MultiRegionDataset: ts_copy = ds.timeseries.copy() # Test positivity should be a ratio ts_copy.loc[:, CommonFields.TEST_POSITIVITY_7D] = ( ts_copy.loc[:, CommonFields.TEST_POSITIVITY_7D] / 100.0) levels = set( Region.from_location_id(l).level for l in ds.timeseries.index.get_level_values(CommonFields.LOCATION_ID)) # Should only be picking up county all_df for now. May need additional logic if states # are included as well assert levels == {AggregationLevel.COUNTY} # Duplicating DC County results as state results because of a downstream # use of how dc state data is used to override DC county data. dc_results = ts_copy.xs(DC_COUNTY_LOCATION_ID, axis=0, level=CommonFields.LOCATION_ID, drop_level=False) dc_results = dc_results.rename( index={DC_COUNTY_LOCATION_ID: DC_STATE_LOCATION_ID}, level=CommonFields.LOCATION_ID) ts_copy = ts_copy.append(dc_results, verify_integrity=True).sort_index() return dataclasses.replace(ds, timeseries=remove_trailing_zeros(ts_copy), timeseries_bucketed=None)
def test_dataclass_include_exclude(): """Tests datasource_regions using mock data for speed.""" region_data = {CommonFields.CASES: [100, 200, 300], CommonFields.DEATHS: [0, 1, 2]} regions_orig = [Region.from_state(state) for state in "AZ CA NY IL TX".split()] + [ Region.from_fips(fips) for fips in "06037 06045 17031 17201".split() ] dataset_orig = test_helpers.build_dataset({region: region_data for region in regions_orig}) # Make a new subclass to keep this test separate from others in the make_dataset lru_cache. class DataSourceForTest(data_source.DataSource): EXPECTED_FIELDS = [CommonFields.CASES, CommonFields.DEATHS] SOURCE_TYPE = "DataSourceForTest" @classmethod def make_dataset(cls) -> timeseries.MultiRegionDataset: return dataset_orig orig_data_source_cls = DataSourceForTest orig_ds = orig_data_source_cls.make_dataset() assert "iso1:us#iso2:us-tx" in orig_ds.location_ids assert "iso1:us#iso2:us-ny" in orig_ds.location_ids ny_source = combined_datasets.datasource_regions( orig_data_source_cls, RegionMask(states=["NY"]) ) ny_ds = ny_source.make_dataset() assert "iso1:us#iso2:us-tx" not in ny_ds.location_ids assert "iso1:us#iso2:us-ny" in ny_ds.location_ids ca_counties_without_la_source = combined_datasets.datasource_regions( orig_data_source_cls, RegionMask(AggregationLevel.COUNTY, states=["CA"]), exclude=Region.from_fips("06037"), ) ds = ca_counties_without_la_source.make_dataset() assert "iso1:us#iso2:us-tx" not in ds.location_ids assert "iso1:us#iso2:us-ca" not in ds.location_ids assert "iso1:us#iso2:us-ca#fips:06045" in ds.location_ids assert "iso1:us#iso2:us-ca#fips:06037" not in ds.location_ids # Just Cook County, IL ds = combined_datasets.datasource_regions( orig_data_source_cls, include=Region.from_fips("17031") ).make_dataset() assert ds.location_ids.to_list() == ["iso1:us#iso2:us-il#fips:17031"]
def il_regional_input(rt_dataset, icu_dataset): region = Region.from_state("IL") regional_data = combined_datasets.load_us_timeseries_dataset( ).get_regions_subset([region]) # TODO(tom): add test positivity back in after PR 728 is merged. # test_positivity_results = test_positivity.AllMethods.run(regional_data) # regional_data = regional_data.join_columns(test_positivity_results.test_positivity) return api_v2_pipeline.RegionalInput.from_region_and_model_output( region, regional_data, rt_dataset, icu_dataset)
def iter_one_regions( self) -> Iterable[Tuple[Region, OneRegionTimeseriesDataset]]: """Iterates through all the regions in this object""" for location_id, data_group in self.data_with_fips.groupby( CommonFields.LOCATION_ID): latest_dict = self._location_id_latest_dict(location_id) yield Region(location_id=location_id, fips=None), OneRegionTimeseriesDataset( data_group, latest_dict)
def il_regional_input(rt_dataset, icu_dataset): region = Region.from_state("IL") regional_data = combined_datasets.load_us_timeseries_dataset( ).get_regions_subset([region]) regional_data = test_positivity.run_and_maybe_join_columns( regional_data, structlog.get_logger()) return api_v2_pipeline.RegionalInput.from_region_and_model_output( region, regional_data, rt_dataset, icu_dataset)
def test_pyseir_end_to_end_dc(tmp_path): # Runs over a single state which tests state filtering + running over more than # a single fips. with unittest.mock.patch("pyseir.utils.OUTPUT_DIR", str(tmp_path)): region = Region.from_state("DC") pipelines = cli._build_all_for_states(states=["DC"]) # Checking to make sure that build all for states properly filters and only # returns DC data assert len(pipelines) == 2
def test_top_level_metrics_with_rt(): region = Region.from_fips("36") latest = { CommonFields.POPULATION: 100_000, CommonFields.FIPS: "36", CommonFields.STATE: "NY", CommonFields.ICU_TYPICAL_OCCUPANCY_RATE: 0.5, CommonFields.ICU_BEDS: 25, }
def test_load_hospitalization_data(): t0 = datetime(year=2020, month=1, day=1) region = Region.from_fips("33") hospitalization_df = load_data.get_hospitalization_data_for_region(region) _, _, hosp_type = load_data.calculate_hospitalization_data( hospitalization_df, t0, category=HospitalizationCategory.ICU ) # Double check that data loads and it went throughh the cumulative hosps assert hosp_type is HospitalizationDataType.CUMULATIVE_HOSPITALIZATIONS
def from_state_region(region: pipeline.Region) -> "RegionalInput": """Creates a RegionalInput for given state region.""" assert region.is_state() hospitalization_df = load_data.get_hospitalization_data_for_region( region) return RegionalInput( region=region, _combined_data=combined_datasets.RegionalData.from_region(region), _hospitalization_df=hospitalization_df, )
def test_preserve_tags(): region_as = Region.from_state("AS") region_tx = Region.from_state("TX") tag1 = test_helpers.make_tag(type=TagType.CUMULATIVE_LONG_TAIL_TRUNCATED, date="2020-04-04") tag2 = test_helpers.make_tag(type=TagType.CUMULATIVE_TAIL_TRUNCATED, date="2020-04-04") tag_drop = test_helpers.make_tag(type=TagType.ZSCORE_OUTLIER, date="2020-04-01") tag3 = test_helpers.make_tag(type=TagType.ZSCORE_OUTLIER, date="2020-04-04") tag4 = test_helpers.make_tag(type=TagType.ZSCORE_OUTLIER, date="2020-04-03") metrics_as = { CommonFields.POSITIVE_TESTS: TimeseriesLiteral( [1, 2, 3, 4], annotation=[tag1], provenance="pos" ), CommonFields.TOTAL_TESTS: TimeseriesLiteral([100, 200, 300, 400], annotation=[tag2]), } metrics_tx = { CommonFields.POSITIVE_TESTS: TimeseriesLiteral([None, None, 3, 4], annotation=[tag_drop]), CommonFields.POSITIVE_TESTS_VIRAL: [10, 20, 30, 40], CommonFields.TOTAL_TESTS: TimeseriesLiteral([100, 200, 300, 400], annotation=[tag3, tag4]), } dataset_in = test_helpers.build_dataset({region_as: metrics_as, region_tx: metrics_tx}) methods = [ DivisionMethod( DatasetName("method1"), CommonFields.POSITIVE_TESTS, CommonFields.TOTAL_TESTS ), DivisionMethod( DatasetName("method2"), CommonFields.POSITIVE_TESTS_VIRAL, CommonFields.TOTAL_TESTS ), ] all_methods = AllMethods.run(dataset_in, methods, diff_days=3) expected_as = { CommonFields.TEST_POSITIVITY: TimeseriesLiteral( [0.01], provenance="pos", annotation=[tag1, tag2] ) } expected_tx = {CommonFields.TEST_POSITIVITY: TimeseriesLiteral([0.1], annotation=[tag3, tag4])} expected_positivity = test_helpers.build_dataset( {region_as: expected_as, region_tx: expected_tx}, start_date="2020-04-04" ) test_helpers.assert_dataset_like(all_methods.test_positivity, expected_positivity)
def test_make_latest_from_timeseries_simple(): data = read_csv_and_index_fips_date( "fips,county,state,country,date,aggregate_level,m1,m2\n" "97123,Smith County,ZZ,USA,2020-04-01,county,1,\n" "97123,Smith County,ZZ,USA,2020-04-02,county,,2\n" ).reset_index() ds = timeseries.MultiRegionDataset.from_fips_timeseries_df(data) region = ds.get_one_region(Region.from_fips("97123")) # Compare 2 values in region.latest expected = {"m1": 1, "m2": 2} actual = {key: region.latest[key] for key in expected.keys()} assert actual == expected
def test_combined_county_has_some_timeseries_data(fips): region = Region.from_fips(fips) latest = combined_datasets.load_us_timeseries_dataset().get_one_region( region) df = latest.data.set_index(CommonFields.DATE) assert df.loc["2020-05-01", CommonFields.CASES] > 0 assert df.loc["2020-05-01", CommonFields.DEATHS] > 0 if fips.startswith( "06" ): # TODO(tom): Remove this condition when we have county data in TX too. assert df.loc["2020-05-01", CommonFields.POSITIVE_TESTS] > 0 assert df.loc["2020-05-01", CommonFields.NEGATIVE_TESTS] > 0 assert df.loc["2020-05-01", CommonFields.CURRENT_ICU] > 0
def test_persist_and_load_dataset(tmp_path, nyc_fips): region = Region.from_fips(nyc_fips) dataset = combined_datasets.load_us_timeseries_dataset() timeseries_nyc = dataset.get_regions_subset([region]) pointer = combined_dataset_utils.persist_dataset(timeseries_nyc, tmp_path) downloaded_dataset = MultiRegionDataset.read_from_pointer(pointer) differ_l = DatasetDiff.make(downloaded_dataset.timeseries) differ_r = DatasetDiff.make(timeseries_nyc.timeseries) differ_l.compare(differ_r) assert not len(differ_l.my_ts)
def update_test_combined_data(truncate_dates: bool, state: List[str]): us_dataset = combined_datasets.load_us_timeseries_dataset() # Keep only a small subset of the regions so we have enough to exercise our code in tests. test_subset = us_dataset.get_regions_subset([ RegionMask(states=[s.strip() for s in state]), Region.from_fips("48201"), Region.from_fips("48301"), Region.from_fips("20161"), Region.from_state("TX"), Region.from_state("KS"), ]) if truncate_dates: dates = test_subset.timeseries_bucketed.index.get_level_values( CommonFields.DATE) date_range_mask = (dates >= "2021-01-01") & (dates < "2021-04-01") test_subset = dataclasses.replace( test_subset, timeseries_bucketed=test_subset.timeseries_bucketed. loc[date_range_mask]) test_subset.write_to_wide_dates_csv( dataset_utils.TEST_COMBINED_WIDE_DATES_CSV_PATH, dataset_utils.TEST_COMBINED_STATIC_CSV_PATH)
def test_top_level_metrics_no_pos_neg_tests_has_positivity_ratio(): ny_region = Region.from_state("NY") metrics = { CommonFields.CASES: [10, 20, 30, 40], CommonFields.NEW_CASES: [10, 10, 10, 10], CommonFields.TEST_POSITIVITY: [0.02, 0.03, 0.04, 0.05], } latest = { CommonFields.POPULATION: 100_000, CommonFields.FIPS: "36", CommonFields.STATE: "NY", CommonFields.ICU_BEDS: 10, }
def test_persist_and_load_dataset(tmp_path, nyc_fips): region = Region.from_fips(nyc_fips) dataset = combined_datasets.load_us_timeseries_dataset() timeseries_nyc = TimeseriesDataset(dataset.get_one_region(region).data) pointer = combined_dataset_utils.persist_dataset(timeseries_nyc, tmp_path) downloaded_dataset = pointer.load_dataset() differ_l = DatasetDiff.make(downloaded_dataset.data) differ_r = DatasetDiff.make(timeseries_nyc.data) differ_l.compare(differ_r) assert not len(differ_l.my_ts)
def _transform_one_override( override: Mapping, cbsa_to_counties_map: Mapping[Region, List[Region]]) -> Filter: region_str = override["region"] if re.fullmatch(r"[A-Z][A-Z]", region_str): region = Region.from_state(region_str) elif re.fullmatch(r"\d{5}", region_str): region = Region.from_fips(region_str) else: raise ValueError(f"Invalid region: {region_str}") include_str = override["include"] if include_str == "region": regions_included = [region] elif include_str == "region-and-subregions": if region.is_state(): regions_included = [RegionMask(states=[region.state])] elif region.level == AggregationLevel.CBSA: regions_included = [region] + cbsa_to_counties_map[region] else: raise ValueError( "region-and-subregions only valid for a state and CBSA") elif include_str == "subregions": if not region.is_state(): raise ValueError("subregions only valid for a state") regions_included = [ RegionMask(AggregationLevel.COUNTY, states=[region.state]) ] else: raise ValueError(f"Invalid include: {include_str}") return Filter( regions_included=regions_included, fields_included=_METRIC_TO_FIELDS[override["metric"]], internal_note=override["context"], public_note=override.get("disclaimer", ""), drop_observations=bool(override["blocked"]), )
def test_basic(): region_tx = Region.from_state("TX") region_sf = Region.from_fips("06075") region_hi = Region.from_state("HI") # Add a timeseries with a tag to make sure they are preserved. ts_with_tag = TimeseriesLiteral( [0, 0, 0], annotation=[test_helpers.make_tag(date="2020-04-01")]) ds_in = test_helpers.build_dataset({ region_tx: { CommonFields.VACCINES_DISTRIBUTED: [0, 0, 0] }, region_sf: { CommonFields.VACCINES_DISTRIBUTED: [0, 0, 1] }, region_hi: { CommonFields.VACCINES_DISTRIBUTED: [0, 0, None], CommonFields.CASES: ts_with_tag, }, }) with structlog.testing.capture_logs() as logs: ds_out = zeros_filter.drop_all_zero_timeseries( ds_in, [CommonFields.VACCINES_DISTRIBUTED]) ds_expected = test_helpers.build_dataset({ region_sf: { CommonFields.VACCINES_DISTRIBUTED: [0, 0, 1] }, region_hi: { CommonFields.CASES: ts_with_tag }, }) log = more_itertools.one(logs) assert log["event"] == zeros_filter.DROPPING_TIMESERIES_WITH_ONLY_ZEROS assert pd.MultiIndex.from_tuples([ (region_hi.location_id, CommonFields.VACCINES_DISTRIBUTED), (region_tx.location_id, CommonFields.VACCINES_DISTRIBUTED), ]).equals(log["dropped"]) test_helpers.assert_dataset_like(ds_expected, ds_out)