예제 #1
0
def test_aggregate():
    df_in = read_csv_and_index_fips_date(
        "fips,state,aggregate_level,county,m1,date,foo\n"
        "55005,ZZ,county,North County,1,2020-05-01,11\n"
        "55005,ZZ,county,North County,2,2020-05-02,22\n"
        "55005,ZZ,county,North County,3,2020-05-03,33\n"
        "55005,ZZ,county,North County,0,2020-05-04,0\n"
        "55006,ZZ,county,South County,0,2020-05-01,0\n"
        "55006,ZZ,county,South County,0,2020-05-02,0\n"
        "55006,ZZ,county,South County,3,2020-05-03,44\n"
        "55006,ZZ,county,South County,4,2020-05-04,55\n"
        "55,ZZ,state,Grand State,41,2020-05-01,66\n"
        "55,ZZ,state,Grand State,43,2020-05-03,77\n"
    ).reset_index()
    ts_in = MultiRegionDataset.from_fips_timeseries_df(df_in)
    agg = statistical_areas.CountyToCBSAAggregator(
        county_map={"55005": "10001", "55006": "10001"},
        cbsa_title_map={"10001": "Stat Area 1"},
        aggregations=[],
    )
    ts_out = agg.aggregate(ts_in)

    assert ts_out.groupby_region().ngroups == 1

    ts_cbsa = ts_out.get_one_region(Region.from_cbsa_code("10001"))
    assert ts_cbsa.date_indexed["m1"].to_dict() == {
        pd.to_datetime("2020-05-01"): 1,
        pd.to_datetime("2020-05-02"): 2,
        pd.to_datetime("2020-05-03"): 6,
        pd.to_datetime("2020-05-04"): 4,
    }
def _fips_csv_to_one_region(csv_str: str,
                            region: Region,
                            latest=None) -> OneRegionTimeseriesDataset:
    df = read_csv_and_index_fips_date(csv_str).reset_index()
    # from_timeseries_and_latest adds the location_id column needed by get_one_region
    dataset = MultiRegionDataset.from_fips_timeseries_df(df).get_one_region(
        region)
    if latest:
        return dataclasses.replace(dataset, latest=latest)
    else:
        return dataset
예제 #3
0
def test_make_latest_from_timeseries_simple():
    data = read_csv_and_index_fips_date(
        "fips,county,state,country,date,aggregate_level,m1,m2\n"
        "97123,Smith County,ZZ,USA,2020-04-01,county,1,\n"
        "97123,Smith County,ZZ,USA,2020-04-02,county,,2\n"
    ).reset_index()
    ds = timeseries.MultiRegionDataset.from_fips_timeseries_df(data)
    region = ds.get_one_region(Region.from_fips("97123"))
    # Compare 2 values in region.latest
    expected = {"m1": 1, "m2": 2}
    actual = {key: region.latest[key] for key in expected.keys()}
    assert actual == expected
예제 #4
0
def test_melt_provenance_multiple_sources():
    wide = read_csv_and_index_fips_date(
        "fips,date,cases,recovered\n"
        "97111,2020-04-01,source_a,source_b\n"
        "97111,2020-04-02,source_x,\n"
        "97222,2020-04-01,source_c,\n"
    )
    with structlog.testing.capture_logs() as logs:
        long = provenance_wide_metrics_to_series(wide, structlog.get_logger())

    assert [l["event"] for l in logs] == ["Multiple rows for a timeseries"]

    assert long.to_dict() == {
        ("97111", "cases"): "source_a;source_x",
        ("97111", "recovered"): "source_b",
        ("97222", "cases"): "source_c",
    }
예제 #5
0
def test_melt_provenance():
    wide = read_csv_and_index_fips_date(
        "fips,date,cases,recovered\n"
        "97111,2020-04-01,source_a,source_b\n"
        "97111,2020-04-02,source_a,\n"
        "97222,2020-04-01,source_c,\n"
    )
    with structlog.testing.capture_logs() as logs:
        long = provenance_wide_metrics_to_series(wide, structlog.get_logger())

    assert logs == []

    assert long.to_dict() == {
        ("97111", "cases"): "source_a",
        ("97111", "recovered"): "source_b",
        ("97222", "cases"): "source_c",
    }
예제 #6
0
def test_load_from_local_public_data():
    agg = statistical_areas.CountyToCBSAAggregator.from_local_public_data()
    agg = dataclasses.replace(agg, aggregations=[])  # Disable scaled aggregations

    assert agg.cbsa_title_map["43580"] == "Sioux City, IA-NE-SD"
    assert agg.county_map["48187"] == "41700"

    df_in = read_csv_and_index_fips_date(
        "fips,state,aggregate_level,county,m1,date,foo\n"
        "48059,ZZ,county,North County,3,2020-05-03,33\n"
        "48253,ZZ,county,South County,4,2020-05-03,77\n"
        "48441,ZZ,county,Other County,2,2020-05-03,41\n"
    ).reset_index()
    ts_in = MultiRegionDataset.from_fips_timeseries_df(df_in)
    ts_out = agg.aggregate(ts_in)
    ts_cbsa = ts_out.get_one_region(Region.from_cbsa_code("10180"))
    assert ts_cbsa.date_indexed["m1"].to_dict() == {
        pd.to_datetime("2020-05-03"): 9,
    }
예제 #7
0
def test_make_latest_from_timeseries_dont_touch_county():
    data = read_csv_and_index_fips_date(
        "fips,county,state,country,date,aggregate_level,m1,m2\n"
        "95123,Smith Countyy,YY,USA,2020-04-01,county,1,\n"
        "97123,Smith Countzz,ZZ,USA,2020-04-01,county,2,\n"
        "56,,WY,USA,2020-04-01,state,3,\n"
    ).reset_index()
    ds = timeseries.MultiRegionDataset.from_fips_timeseries_df(data)

    def get_latest(region) -> dict:
        """Returns an interesting subset of latest for given region"""
        latest = ds.get_one_region(region).latest
        return {key: latest[key] for key in ["county", "m1", "m2"] if latest.get(key) is not None}

    assert get_latest(Region.from_fips("95123")) == {
        "m1": 1,
        "county": "Smith Countyy",
    }
    assert get_latest(Region.from_fips("97123")) == {
        "m1": 2,
        "county": "Smith Countzz",
    }
    assert get_latest(Region.from_state("WY")) == {"m1": 3}