예제 #1
0
    def timeseries(self) -> TimeseriesDataset:
        """Build TimeseriesDataset from this data source."""
        if set(self.INDEX_FIELD_MAP.keys()) != set(
                TimeseriesDataset.INDEX_FIELDS):
            raise ValueError("Index fields must match")

        return TimeseriesDataset.from_source(
            self, fill_missing_state=self.FILL_MISSING_STATE_LEVEL_DATA)
예제 #2
0
def test_summarize_timeseries_fields_with_some_real_data():
    data_source = CovidCountyDataDataSource.local()
    ts = TimeseriesDataset.from_source(data_source)
    summary = summarize_timeseries_fields(
        ts.data.loc[lambda df: df[CommonFields.FIPS].str.startswith("06")])
    assert not summary.empty
    cases_summary = summary.loc[("06025", "cases"), :]
    assert summary.loc[("06025", "cases"), "max_value"] > 7000
    assert summary.loc[("06025", "cases"),
                       "max_date"] > pd.to_datetime("2020-08-01")
    assert summary.loc[("06025", "cases"),
                       "largest_delta_date"] > pd.to_datetime("2020-04-01")
    assert cases_summary["has_value"] == True
    assert cases_summary["num_observations"] > 100
예제 #3
0
def test_expected_field_in_sources(data_source_cls):
    data_source = data_source_cls.local()
    ts = TimeseriesDataset.from_source(data_source)
    # Extract the USA data from the raw DF. Replace this with cleaner access when the DataSource makes it easy.
    rename_columns = {source: common for common, source in data_source.all_fields_map().items()}
    renamed_data = data_source.data.rename(columns=rename_columns)
    usa_data = renamed_data.loc[renamed_data["country"] == "USA"]

    assert not usa_data.empty

    states = set(usa_data["state"])

    if data_source.SOURCE_NAME == "NHA":
        assert states == {"NV"}
    else:
        good_state = set()
        for state in states:
            if re.fullmatch(r"[A-Z]{2}", state):
                good_state.add(state)
            else:
                logging.info(f"Ignoring {state} in {data_source.SOURCE_NAME}")
        assert len(good_state) >= 48
예제 #4
0
 def timeseries(self) -> "TimeseriesDataset":
     """Builds generic beds dataset"""
     return TimeseriesDataset.from_source(self)