def test_get_subset_and_get_data(): input_df = pd.read_csv( StringIO( "city,county,state,fips,country,aggregate_level,date,metric\n" "Smithville,,ZZ,97123,USA,city,2020-03-23,smithville-march23\n" "New York City,,ZZ,97324,USA,city,2020-03-22,march22-nyc\n" "New York City,,ZZ,97324,USA,city,2020-03-24,march24-nyc\n" ",North County,ZZ,97001,USA,county,2020-03-23,county-metric\n" ",,ZZ,97001,USA,state,2020-03-23,mystate\n" ",,XY,96001,USA,state,2020-03-23,other-state\n" ",,,,UK,country,2020-03-23,you-kee\n" ",,,,US,country,2020-03-23,you-ess-hey\n")) ts = TimeseriesDataset(input_df) assert set(ts.get_subset(AggregationLevel.COUNTRY).data["metric"]) == { "you-kee", "you-ess-hey" } assert set( ts.get_subset(AggregationLevel.COUNTRY, country="UK").data["country"]) == {"UK"} assert set(ts.get_subset( AggregationLevel.STATE).data["metric"]) == {"mystate", "other-state"} assert set(ts.get_data(None, state="ZZ", after="2020-03-23")["metric"]) == {"march24-nyc"} assert set(ts.get_data(None, state="ZZ", after="2020-03-22")["metric"]) == { "smithville-march23", "county-metric", "mystate", "march24-nyc", } assert set( ts.get_data(AggregationLevel.STATE, states=["ZZ", "XY"])["metric"]) == { "mystate", "other-state", } assert set(ts.get_data(None, states=["ZZ"], on="2020-03-23")["metric"]) == { "smithville-march23", "county-metric", "mystate", } assert set( ts.get_data(None, states=["ZZ"], before="2020-03-23")["metric"]) == {"march22-nyc"}
def test_get_subset(): # CSV with a unique FIPS value for every region, even countries. In production countries are removed before # TimeseriesDataset is created. A future change may replace FIPS with a more general identifier. input_df = pd.read_csv( StringIO( "city,county,state,fips,country,aggregate_level,date,metric\n" "Smithville,,ZZ,97123,USA,city,2020-03-23,smithville-march23\n" "New York City,,ZZ,97324,USA,city,2020-03-22,march22-nyc\n" "New York City,,ZZ,97324,USA,city,2020-03-24,march24-nyc\n" ",North County,ZZ,97001,USA,county,2020-03-23,county-metric\n" ",,ZZ,97,USA,state,2020-03-23,mystate\n" ",,XY,96,USA,state,2020-03-23,other-state\n" ",,,iso2:uk,UK,country,2020-03-23,you-kee\n" ",,,iso2:us,US,country,2020-03-23,you-ess-hey\n")) ts = TimeseriesDataset(input_df) assert set(ts.get_subset(AggregationLevel.COUNTRY).data["metric"]) == { "you-kee", "you-ess-hey" } assert set( ts.get_subset(AggregationLevel.COUNTRY, country="UK").data["country"]) == {"UK"} assert set(ts.get_subset( AggregationLevel.STATE).data["metric"]) == {"mystate", "other-state"} assert set(ts.get_subset( state="ZZ", after="2020-03-23").data["metric"]) == {"march24-nyc"} assert set(ts.get_subset(state="ZZ", after="2020-03-22").data["metric"]) == { "smithville-march23", "county-metric", "mystate", "march24-nyc", } assert set( ts.get_subset(AggregationLevel.STATE, states=["ZZ", "XY"]).data["metric"]) == { "mystate", "other-state", } assert set(ts.get_subset(states=["ZZ"], on="2020-03-23").data["metric"]) == { "smithville-march23", "county-metric", "mystate", } assert set( ts.get_subset(states=["ZZ"], before="2020-03-23").data["metric"]) == {"march22-nyc"}
def latest_case_summaries_by_state( dataset: TimeseriesDataset) -> Iterator[StateCaseSummary]: """Builds summary of latest case data by state and county. Data is generated for the embeds which expects a list of records in this format: { "state": <state>, "date": "YYYY-MM-DD", "cases": <cases>, "deaths": <deaths>, "counties": [ {"fips": <fips code>, "cases": <cases>, "deaths": <deaths", "date": <date>} ] } Args: data: Timeseries object. Returns: List of data. """ dataset = dataset.get_subset(None, country="USA") latest_state = dataset.latest_values(AggregationLevel.STATE) latest_county = dataset.latest_values(AggregationLevel.COUNTY) latest_state["date"] = latest_state["date"].dt.strftime("%Y-%m-%d") latest_county["date"] = latest_county["date"].dt.strftime("%Y-%m-%d") states = latest_state[STATE_EXPORT_FIELDS].to_dict(orient="records") for state_data in states: state = state_data["state"] if len(state) != 2: _logger.info(f"Skipping state {state}") continue county_data = latest_county[latest_county.state == state] counties = county_data[COUNTY_EXPORT_FIELDS].to_dict(orient="records") state_data.update({"counties": counties}) yield StateCaseSummary(**state_data)