def test_wide_dates(): input_df = read_csv_and_index_fips_date( "fips,county,aggregate_level,date,m1,m2\n" "97111,Bar County,county,2020-04-01,1,\n" "97111,Bar County,county,2020-04-02,2,\n" "97222,Foo County,county,2020-04-01,,10\n" "97222,Foo County,county,2020-04-03,3,30\n") provenance = provenance_wide_metrics_to_series( read_csv_and_index_fips_date("fips,date,m1,m2\n" "97111,2020-04-01,src11,\n" "97111,2020-04-02,src11,\n" "97222,2020-04-01,,src22\n" "97222,2020-04-03,src21,src22\n"), structlog.get_logger(), ) ts = TimeseriesDataset(input_df.reset_index(), provenance=provenance) date_columns = ts.get_date_columns() assert to_dict(["fips", "variable"], date_columns["value"]) == { ("97111", "m1"): { pd.to_datetime("2020-04-01"): 1.0, pd.to_datetime("2020-04-02"): 2.0 }, ("97222", "m1"): { pd.to_datetime("2020-04-03"): 3.0 }, ("97222", "m2"): { pd.to_datetime("2020-04-01"): 10.0, pd.to_datetime("2020-04-03"): 30.0 }, } assert to_dict(["fips", "variable"], date_columns["provenance"]) == { ("97111", "m1"): { "value": "src11" }, ("97222", "m1"): { "value": "src21" }, ("97222", "m2"): { "value": "src22" }, }
def test_make_latest_from_timeseries_simple(): data = read_csv_and_index_fips_date( "fips,county,state,country,date,aggregate_level,m1,m2\n" "97123,Smith County,ZZ,USA,2020-04-01,county,1,\n" "97123,Smith County,ZZ,USA,2020-04-02,county,,2\n").reset_index() ts = TimeseriesDataset(data) assert to_dict(["fips"], ts.latest_values()[["fips", "m1", "m2"]]) == { "97123": { "m1": 1, "m2": 2 } }
def test_remove_duplicate_city_data(): input_df = pd.read_csv( StringIO( "city,county,state,fips,date,metric_a\n" "Smithville,,ZZ,97123,2020-03-23,march23-removed\n" "Smithville,,ZZ,97123,2020-03-22,march22-kept\n" "New York City,,ZZ,97324,2020-03-22,march22-ny-patched\n" ",North County,ZZ,97001,2020-03-22,county-not-touched\n" ",North County,ZZ,97001,2020-03-23,county-not-touched\n" ) ) output_df = cds_dataset.CDSDataset._remove_duplicate_city_data(input_df) expected_df = pd.read_csv( StringIO( "city,county,state,fips,date,metric_a\n" "Smithville,Smithville,ZZ,97123,2020-03-22,march22-kept\n" "New York City,New York,ZZ,97324,2020-03-22,march22-ny-patched\n" ",North County,ZZ,97001,2020-03-22,county-not-touched\n" ",North County,ZZ,97001,2020-03-23,county-not-touched\n" ) ) assert to_dict(["fips", "date"], output_df) == to_dict(["fips", "date"], expected_df)
def test_make_latest_from_timeseries_dont_touch_county(): data = read_csv_and_index_fips_date( "fips,county,state,country,date,aggregate_level,m1,m2\n" "95123,Smith Countyy,YY,USA,2020-04-01,county,1,\n" "97123,Smith Countzz,ZZ,USA,2020-04-01,county,2,\n" "97,,ZZ,USA,2020-04-01,state,3,\n").reset_index() ts = TimeseriesDataset(data) assert to_dict(["fips"], ts.latest_values()[["fips", "county", "m1", "m2"]]) == { "95123": { "m1": 1, "county": "Smith Countyy" }, "97123": { "m1": 2, "county": "Smith Countzz" }, "97": { "m1": 3 }, }