예제 #1
0
def test_get_subset_and_get_data():
    input_df = pd.read_csv(
        StringIO(
            "city,county,state,fips,country,aggregate_level,date,metric\n"
            "Smithville,,ZZ,97123,USA,city,2020-03-23,smithville-march23\n"
            "New York City,,ZZ,97324,USA,city,2020-03-22,march22-nyc\n"
            "New York City,,ZZ,97324,USA,city,2020-03-24,march24-nyc\n"
            ",North County,ZZ,97001,USA,county,2020-03-23,county-metric\n"
            ",,ZZ,97001,USA,state,2020-03-23,mystate\n"
            ",,XY,96001,USA,state,2020-03-23,other-state\n"
            ",,,,UK,country,2020-03-23,you-kee\n"
            ",,,,US,country,2020-03-23,you-ess-hey\n"))
    ts = TimeseriesDataset(input_df)

    assert set(ts.get_subset(AggregationLevel.COUNTRY).data["metric"]) == {
        "you-kee", "you-ess-hey"
    }
    assert set(
        ts.get_subset(AggregationLevel.COUNTRY,
                      country="UK").data["country"]) == {"UK"}
    assert set(ts.get_subset(
        AggregationLevel.STATE).data["metric"]) == {"mystate", "other-state"}
    assert set(ts.get_data(None, state="ZZ",
                           after="2020-03-23")["metric"]) == {"march24-nyc"}
    assert set(ts.get_data(None, state="ZZ",
                           after="2020-03-22")["metric"]) == {
                               "smithville-march23",
                               "county-metric",
                               "mystate",
                               "march24-nyc",
                           }
    assert set(
        ts.get_data(AggregationLevel.STATE, states=["ZZ",
                                                    "XY"])["metric"]) == {
                                                        "mystate",
                                                        "other-state",
                                                    }
    assert set(ts.get_data(None, states=["ZZ"],
                           on="2020-03-23")["metric"]) == {
                               "smithville-march23",
                               "county-metric",
                               "mystate",
                           }
    assert set(
        ts.get_data(None, states=["ZZ"],
                    before="2020-03-23")["metric"]) == {"march22-nyc"}
예제 #2
0
def test_get_subset():
    # CSV with a unique FIPS value for every region, even countries. In production countries are removed before
    # TimeseriesDataset is created. A future change may replace FIPS with a more general identifier.
    input_df = pd.read_csv(
        StringIO(
            "city,county,state,fips,country,aggregate_level,date,metric\n"
            "Smithville,,ZZ,97123,USA,city,2020-03-23,smithville-march23\n"
            "New York City,,ZZ,97324,USA,city,2020-03-22,march22-nyc\n"
            "New York City,,ZZ,97324,USA,city,2020-03-24,march24-nyc\n"
            ",North County,ZZ,97001,USA,county,2020-03-23,county-metric\n"
            ",,ZZ,97,USA,state,2020-03-23,mystate\n"
            ",,XY,96,USA,state,2020-03-23,other-state\n"
            ",,,iso2:uk,UK,country,2020-03-23,you-kee\n"
            ",,,iso2:us,US,country,2020-03-23,you-ess-hey\n"))
    ts = TimeseriesDataset(input_df)

    assert set(ts.get_subset(AggregationLevel.COUNTRY).data["metric"]) == {
        "you-kee", "you-ess-hey"
    }
    assert set(
        ts.get_subset(AggregationLevel.COUNTRY,
                      country="UK").data["country"]) == {"UK"}
    assert set(ts.get_subset(
        AggregationLevel.STATE).data["metric"]) == {"mystate", "other-state"}
    assert set(ts.get_subset(
        state="ZZ", after="2020-03-23").data["metric"]) == {"march24-nyc"}
    assert set(ts.get_subset(state="ZZ",
                             after="2020-03-22").data["metric"]) == {
                                 "smithville-march23",
                                 "county-metric",
                                 "mystate",
                                 "march24-nyc",
                             }
    assert set(
        ts.get_subset(AggregationLevel.STATE,
                      states=["ZZ", "XY"]).data["metric"]) == {
                          "mystate",
                          "other-state",
                      }
    assert set(ts.get_subset(states=["ZZ"],
                             on="2020-03-23").data["metric"]) == {
                                 "smithville-march23",
                                 "county-metric",
                                 "mystate",
                             }
    assert set(
        ts.get_subset(states=["ZZ"],
                      before="2020-03-23").data["metric"]) == {"march22-nyc"}
예제 #3
0
def latest_case_summaries_by_state(
        dataset: TimeseriesDataset) -> Iterator[StateCaseSummary]:
    """Builds summary of latest case data by state and county.

    Data is generated for the embeds which expects a list of records in this format:
    {
        "state": <state>,
        "date": "YYYY-MM-DD",
        "cases": <cases>,
        "deaths": <deaths>,
        "counties": [
            {"fips": <fips code>, "cases": <cases>, "deaths": <deaths", "date": <date>}
        ]
    }

    Args:
        data: Timeseries object.

    Returns: List of data.
    """

    dataset = dataset.get_subset(None, country="USA")
    latest_state = dataset.latest_values(AggregationLevel.STATE)
    latest_county = dataset.latest_values(AggregationLevel.COUNTY)

    latest_state["date"] = latest_state["date"].dt.strftime("%Y-%m-%d")
    latest_county["date"] = latest_county["date"].dt.strftime("%Y-%m-%d")

    states = latest_state[STATE_EXPORT_FIELDS].to_dict(orient="records")

    for state_data in states:
        state = state_data["state"]
        if len(state) != 2:
            _logger.info(f"Skipping state {state}")
            continue

        county_data = latest_county[latest_county.state == state]
        counties = county_data[COUNTY_EXPORT_FIELDS].to_dict(orient="records")

        state_data.update({"counties": counties})

        yield StateCaseSummary(**state_data)