예제 #1
0
def latest_case_summaries_by_state(
        dataset: TimeseriesDataset) -> Iterator[StateCaseSummary]:
    """Builds summary of latest case data by state and county.

    Data is generated for the embeds which expects a list of records in this format:
    {
        "state": <state>,
        "date": "YYYY-MM-DD",
        "cases": <cases>,
        "deaths": <deaths>,
        "counties": [
            {"fips": <fips code>, "cases": <cases>, "deaths": <deaths", "date": <date>}
        ]
    }

    Args:
        data: Timeseries object.

    Returns: List of data.
    """

    dataset = dataset.get_subset(None, country="USA")
    latest_state = dataset.latest_values(AggregationLevel.STATE)
    latest_county = dataset.latest_values(AggregationLevel.COUNTY)

    latest_state["date"] = latest_state["date"].dt.strftime("%Y-%m-%d")
    latest_county["date"] = latest_county["date"].dt.strftime("%Y-%m-%d")

    states = latest_state[STATE_EXPORT_FIELDS].to_dict(orient="records")

    for state_data in states:
        state = state_data["state"]
        if len(state) != 2:
            _logger.info(f"Skipping state {state}")
            continue

        county_data = latest_county[latest_county.state == state]
        counties = county_data[COUNTY_EXPORT_FIELDS].to_dict(orient="records")

        state_data.update({"counties": counties})

        yield StateCaseSummary(**state_data)
def test_make_latest_from_timeseries_simple():
    data = read_csv_and_index_fips_date(
        "fips,county,state,country,date,aggregate_level,m1,m2\n"
        "97123,Smith County,ZZ,USA,2020-04-01,county,1,\n"
        "97123,Smith County,ZZ,USA,2020-04-02,county,,2\n").reset_index()
    ts = TimeseriesDataset(data)
    assert to_dict(["fips"],
                   ts.latest_values()[["fips", "m1", "m2"]]) == {
                       "97123": {
                           "m1": 1,
                           "m2": 2
                       }
                   }
def test_make_latest_from_timeseries_dont_touch_county():
    data = read_csv_and_index_fips_date(
        "fips,county,state,country,date,aggregate_level,m1,m2\n"
        "95123,Smith Countyy,YY,USA,2020-04-01,county,1,\n"
        "97123,Smith Countzz,ZZ,USA,2020-04-01,county,2,\n"
        "97,,ZZ,USA,2020-04-01,state,3,\n").reset_index()
    ts = TimeseriesDataset(data)
    assert to_dict(["fips"],
                   ts.latest_values()[["fips", "county", "m1", "m2"]]) == {
                       "95123": {
                           "m1": 1,
                           "county": "Smith Countyy"
                       },
                       "97123": {
                           "m1": 2,
                           "county": "Smith Countzz"
                       },
                       "97": {
                           "m1": 3
                       },
                   }