Python TimeseriesDataset Exemples, libs.datasets.timeseries.TimeseriesDataset Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : combined_dataset_test.py Projet : andresmmujica/covid-data-model

def test_make_latest_from_timeseries_simple():
    data = read_csv_and_index_fips_date(
        "fips,county,state,country,date,aggregate_level,m1,m2\n"
        "97123,Smith County,ZZ,USA,2020-04-01,county,1,\n"
        "97123,Smith County,ZZ,USA,2020-04-02,county,,2\n").reset_index()
    ts = TimeseriesDataset(data)
    assert to_dict(["fips"],
                   ts.latest_values()[["fips", "m1", "m2"]]) == {
                       "97123": {
                           "m1": 1,
                           "m2": 2
                       }
                   }

Exemple #2

0

Afficher le fichier

def test_get_subset():
    # CSV with a unique FIPS value for every region, even countries. In production countries are removed before
    # TimeseriesDataset is created. A future change may replace FIPS with a more general identifier.
    input_df = pd.read_csv(
        StringIO(
            "city,county,state,fips,country,aggregate_level,date,metric\n"
            "Smithville,,ZZ,97123,USA,city,2020-03-23,smithville-march23\n"
            "New York City,,ZZ,97324,USA,city,2020-03-22,march22-nyc\n"
            "New York City,,ZZ,97324,USA,city,2020-03-24,march24-nyc\n"
            ",North County,ZZ,97001,USA,county,2020-03-23,county-metric\n"
            ",,ZZ,97,USA,state,2020-03-23,mystate\n"
            ",,XY,96,USA,state,2020-03-23,other-state\n"
            ",,,iso2:uk,UK,country,2020-03-23,you-kee\n"
            ",,,iso2:us,US,country,2020-03-23,you-ess-hey\n"))
    ts = TimeseriesDataset(input_df)

    assert set(ts.get_subset(AggregationLevel.COUNTRY).data["metric"]) == {
        "you-kee", "you-ess-hey"
    }
    assert set(
        ts.get_subset(AggregationLevel.COUNTRY,
                      country="UK").data["country"]) == {"UK"}
    assert set(ts.get_subset(
        AggregationLevel.STATE).data["metric"]) == {"mystate", "other-state"}
    assert set(ts.get_subset(
        state="ZZ", after="2020-03-23").data["metric"]) == {"march24-nyc"}
    assert set(ts.get_subset(state="ZZ",
                             after="2020-03-22").data["metric"]) == {
                                 "smithville-march23",
                                 "county-metric",
                                 "mystate",
                                 "march24-nyc",
                             }
    assert set(
        ts.get_subset(AggregationLevel.STATE,
                      states=["ZZ", "XY"]).data["metric"]) == {
                          "mystate",
                          "other-state",
                      }
    assert set(ts.get_subset(states=["ZZ"],
                             on="2020-03-23").data["metric"]) == {
                                 "smithville-march23",
                                 "county-metric",
                                 "mystate",
                             }
    assert set(
        ts.get_subset(states=["ZZ"],
                      before="2020-03-23").data["metric"]) == {"march22-nyc"}

Exemple #3

0

Afficher le fichier

def generate_region_timeseries(
    region_summary: RegionSummary,
    timeseries: TimeseriesDataset,
    model_output: Optional[CANPyseirLocationOutput],
) -> RegionSummaryWithTimeseries:
    if not region_summary.intervention:
        # All region summaries here are expected to have actuals values.
        # It's a bit unclear why the actuals value is optional in the first place,
        # but at this point we expect actuals to have been included.
        raise AssertionError("Region summary missing actuals")

    actuals_timeseries = []

    for row in timeseries.yield_records():
        # Timeseries records don't have population
        row[CommonFields.POPULATION] = region_summary.population
        actual = _generate_actuals(row, region_summary.intervention)
        timeseries_row = ActualsTimeseriesRow(**actual.dict(),
                                              date=row[CommonFields.DATE])
        actuals_timeseries.append(timeseries_row)

    model_timeseries = []
    if model_output:
        model_timeseries = [
            _generate_prediction_timeseries_row(row)
            for row in model_output.data.to_dict(orient="records")
        ]

    region_summary_data = {
        key: getattr(region_summary, key)
        for (key, _) in region_summary
    }
    return RegionSummaryWithTimeseries(**region_summary_data,
                                       timeseries=model_timeseries,
                                       actualsTimeseries=actuals_timeseries)

Exemple #4

0

Afficher le fichier

def _fips_csv_to_one_region(csv_str: str,
                            region: Region) -> OneRegionTimeseriesDataset:
    # Make a Timeseries first because it can have a FIPS column without location_id
    ts = TimeseriesDataset.load_csv(io.StringIO(csv_str))
    # from_timeseries_and_latest adds the location_id column needed by get_one_region
    return MultiRegionTimeseriesDataset.from_timeseries_and_latest(
        ts, ts.latest_values_object()).get_one_region(region)

Exemple #5

0

Afficher le fichier

def test_aggregate():
    ts = TimeseriesDataset.load_csv(
        io.StringIO("fips,state,aggregate_level,county,m1,date,foo\n"
                    "55005,ZZ,county,North County,1,2020-05-01,ab\n"
                    "55005,ZZ,county,North County,2,2020-05-02,cd\n"
                    "55005,ZZ,county,North County,3,2020-05-03,ef\n"
                    "55006,ZZ,county,South County,3,2020-05-03,ef\n"
                    "55006,ZZ,county,South County,4,2020-05-04,gh\n"
                    "55,ZZ,state,Grand State,41,2020-05-01,ij\n"
                    "55,ZZ,state,Grand State,43,2020-05-03,kl\n"))
    ts_in = MultiRegionTimeseriesDataset.from_timeseries_and_latest(
        ts, ts.latest_values_object())
    agg = statistical_areas.CountyToCBSAAggregator(
        county_map={
            "55005": "10001",
            "55006": "10001"
        },
        cbsa_title_map={"10001": "Stat Area 1"})
    ts_out = agg.aggregate(ts_in)

    assert ts_out.groupby_region().ngroups == 1

    ts_cbsa = ts_out.get_one_region(Region.from_cbsa_code("10001"))
    assert ts_cbsa.date_indexed["m1"].to_dict() == {
        pd.to_datetime("2020-05-01"): 1,
        pd.to_datetime("2020-05-02"): 2,
        pd.to_datetime("2020-05-03"): 6,
        pd.to_datetime("2020-05-04"): 4,
    }

Exemple #6

0

Afficher le fichier

def get_timeseries_for_state(
        state: str,
        columns: List = None,
        min_range_with_some_value: bool = False) -> TimeseriesDataset:
    """Gets timeseries for a specific state abbreviation.

    Args:
        state: 2-letter state code
        columns: List of columns, apart from `TimeseriesDataset.INDEX_FIELDS`, to include.
        min_range_with_some_value: If True, removes NaNs that pad values at beginning and end of
            timeseries. Only applicable when columns are specified.

    Returns: Timeseries for state
    """

    state_ts = load_us_timeseries_dataset().get_subset(AggregationLevel.STATE,
                                                       state=state)
    if columns:
        subset = state_ts.data.loc[:, TimeseriesDataset.INDEX_FIELDS +
                                   columns].reset_index(drop=True)

        if min_range_with_some_value:
            subset = _remove_padded_nans(subset, columns)

        state_ts = TimeseriesDataset(subset)

    return state_ts

Exemple #7

0

Afficher le fichier

def get_timeseries_for_fips(
        fips: str,
        columns: List = None,
        min_range_with_some_value: bool = False) -> TimeseriesDataset:
    """Gets timeseries for a specific FIPS code.

    Args:
        fips: FIPS code.  Can be county (5 character) or state (2 character) code.
        columns: List of columns, apart from `TimeseriesDataset.INDEX_FIELDS`, to include.
        min_range_with_some_value: If True, removes NaNs that pad values at beginning and end of
            timeseries. Only applicable when columns are specified.

    Returns: Timeseries for fips
    """

    state_ts = load_us_timeseries_dataset().get_subset(None, fips=fips)
    if columns:
        subset = state_ts.data.loc[:, TimeseriesDataset.INDEX_FIELDS +
                                   columns].reset_index(drop=True)

        if min_range_with_some_value:
            subset = _remove_padded_nans(subset, columns)

        state_ts = TimeseriesDataset(subset)

    return state_ts

Exemple #8

0

Afficher le fichier

def test_unique_timeseries(data_source_cls):

    data_source = data_source_cls.local()
    timeseries = TimeseriesDataset.build_from_data_source(data_source)
    timeseries = combined_datasets.US_STATES_FILTER.apply(timeseries)
    timeseries_data = timeseries.data.set_index(timeseries.INDEX_FIELDS)
    duplicates = timeseries_data.index.duplicated()
    assert not sum(duplicates)

Exemple #9

0

Afficher le fichier

Fichier : data_source.py Projet : andresmmujica/covid-data-model

    def timeseries(self) -> TimeseriesDataset:
        """Build TimeseriesDataset from this data source."""
        if set(self.INDEX_FIELD_MAP.keys()) != set(
                TimeseriesDataset.INDEX_FIELDS):
            raise ValueError("Index fields must match")

        return TimeseriesDataset.from_source(
            self, fill_missing_state=self.FILL_MISSING_STATE_LEVEL_DATA)

Exemple #10

0

Afficher le fichier

def test_get_subset_and_get_data():
    input_df = pd.read_csv(
        StringIO(
            "city,county,state,fips,country,aggregate_level,date,metric\n"
            "Smithville,,ZZ,97123,USA,city,2020-03-23,smithville-march23\n"
            "New York City,,ZZ,97324,USA,city,2020-03-22,march22-nyc\n"
            "New York City,,ZZ,97324,USA,city,2020-03-24,march24-nyc\n"
            ",North County,ZZ,97001,USA,county,2020-03-23,county-metric\n"
            ",,ZZ,97001,USA,state,2020-03-23,mystate\n"
            ",,XY,96001,USA,state,2020-03-23,other-state\n"
            ",,,,UK,country,2020-03-23,you-kee\n"
            ",,,,US,country,2020-03-23,you-ess-hey\n"))
    ts = TimeseriesDataset(input_df)

    assert set(ts.get_subset(AggregationLevel.COUNTRY).data["metric"]) == {
        "you-kee", "you-ess-hey"
    }
    assert set(
        ts.get_subset(AggregationLevel.COUNTRY,
                      country="UK").data["country"]) == {"UK"}
    assert set(ts.get_subset(
        AggregationLevel.STATE).data["metric"]) == {"mystate", "other-state"}
    assert set(ts.get_data(None, state="ZZ",
                           after="2020-03-23")["metric"]) == {"march24-nyc"}
    assert set(ts.get_data(None, state="ZZ",
                           after="2020-03-22")["metric"]) == {
                               "smithville-march23",
                               "county-metric",
                               "mystate",
                               "march24-nyc",
                           }
    assert set(
        ts.get_data(AggregationLevel.STATE, states=["ZZ",
                                                    "XY"])["metric"]) == {
                                                        "mystate",
                                                        "other-state",
                                                    }
    assert set(ts.get_data(None, states=["ZZ"],
                           on="2020-03-23")["metric"]) == {
                               "smithville-march23",
                               "county-metric",
                               "mystate",
                           }
    assert set(
        ts.get_data(None, states=["ZZ"],
                    before="2020-03-23")["metric"]) == {"march22-nyc"}

Exemple #11

0

Afficher le fichier

def test_unique_timeseries(data_source_cls):
    data_source = data_source_cls.local()
    timeseries = TimeseriesDataset.build_from_data_source(data_source)
    timeseries = combined_datasets.US_STATES_FILTER.apply(timeseries)
    # Check for duplicate rows with the same INDEX_FIELDS. Sort by index so duplicates are next to
    # each other in the message if the assert fails.
    timeseries_data = timeseries.data.set_index(timeseries.INDEX_FIELDS).sort_index()
    duplicates = timeseries_data.index.duplicated(keep=False)
    assert not sum(duplicates), str(timeseries_data.loc[duplicates])

Exemple #12

0

Afficher le fichier

def load_combined_timeseries(
        sources: Dict[str, TimeseriesDataset],
        timeseries: TimeseriesDataset) -> TimeseriesDataset:
    timeseries_data = timeseries.data.copy()
    timeseries_data["source"] = "Combined Data"

    combined_timeseries = TimeseriesDataset(
        pd.concat([timeseries_data] +
                  [source.data for source in sources.values()]))
    return combined_timeseries

Exemple #13

0

Afficher le fichier

Fichier : wide_dates_df_test.py Projet : andresmmujica/covid-data-model

def test_write_csv():
    df = pd.DataFrame({
        CommonFields.DATE:
        pd.to_datetime(["2020-04-01", "2020-04-02"]),
        CommonFields.FIPS: ["06045", "45123"],
        CommonFields.CASES: [234, 456],
    })
    ts = TimeseriesDataset(df)

    expected_csv = """,,summary,summary,summary,summary,summary,summary,summary,summary,summary,value,value
date,,has_value,min_date,max_date,max_value,min_value,latest_value,num_observations,largest_delta,largest_delta_date,2020-04-01 00:00:00,2020-04-02 00:00:00
fips,variable,,,,,,,,,,,
06045,cases,True,2020-04-01,2020-04-01,234,234,234,1,,,234,
45123,cases,True,2020-04-02,2020-04-02,456,456,456,1,,,,456
"""
    # Call common_df.write_csv with index set to ["fips", "date"], the expected normal index.
    with temppathlib.NamedTemporaryFile("w+") as tmp:
        wide_dates_df.write_csv(ts.get_date_columns(), tmp.path)
        assert expected_csv == tmp.file.read()

Exemple #14

0

Afficher le fichier

def latest_case_summaries_by_state(
        dataset: TimeseriesDataset) -> Iterator[StateCaseSummary]:
    """Builds summary of latest case data by state and county.

    Data is generated for the embeds which expects a list of records in this format:
    {
        "state": <state>,
        "date": "YYYY-MM-DD",
        "cases": <cases>,
        "deaths": <deaths>,
        "counties": [
            {"fips": <fips code>, "cases": <cases>, "deaths": <deaths", "date": <date>}
        ]
    }

    Args:
        data: Timeseries object.

    Returns: List of data.
    """

    dataset = dataset.get_subset(None, country="USA")
    latest_state = dataset.latest_values(AggregationLevel.STATE)
    latest_county = dataset.latest_values(AggregationLevel.COUNTY)

    latest_state["date"] = latest_state["date"].dt.strftime("%Y-%m-%d")
    latest_county["date"] = latest_county["date"].dt.strftime("%Y-%m-%d")

    states = latest_state[STATE_EXPORT_FIELDS].to_dict(orient="records")

    for state_data in states:
        state = state_data["state"]
        if len(state) != 2:
            _logger.info(f"Skipping state {state}")
            continue

        county_data = latest_county[latest_county.state == state]
        counties = county_data[COUNTY_EXPORT_FIELDS].to_dict(orient="records")

        state_data.update({"counties": counties})

        yield StateCaseSummary(**state_data)

Exemple #15

0

Afficher le fichier

def test_wide_dates():
    input_df = read_csv_and_index_fips_date(
        "fips,county,aggregate_level,date,m1,m2\n"
        "97111,Bar County,county,2020-04-01,1,\n"
        "97111,Bar County,county,2020-04-02,2,\n"
        "97222,Foo County,county,2020-04-01,,10\n"
        "97222,Foo County,county,2020-04-03,3,30\n")
    provenance = provenance_wide_metrics_to_series(
        read_csv_and_index_fips_date("fips,date,m1,m2\n"
                                     "97111,2020-04-01,src11,\n"
                                     "97111,2020-04-02,src11,\n"
                                     "97222,2020-04-01,,src22\n"
                                     "97222,2020-04-03,src21,src22\n"),
        structlog.get_logger(),
    )

    ts = TimeseriesDataset(input_df.reset_index(), provenance=provenance)
    date_columns = ts.get_date_columns()
    assert to_dict(["fips", "variable"], date_columns["value"]) == {
        ("97111", "m1"): {
            pd.to_datetime("2020-04-01"): 1.0,
            pd.to_datetime("2020-04-02"): 2.0
        },
        ("97222", "m1"): {
            pd.to_datetime("2020-04-03"): 3.0
        },
        ("97222", "m2"): {
            pd.to_datetime("2020-04-01"): 10.0,
            pd.to_datetime("2020-04-03"): 30.0
        },
    }
    assert to_dict(["fips", "variable"], date_columns["provenance"]) == {
        ("97111", "m1"): {
            "value": "src11"
        },
        ("97222", "m1"): {
            "value": "src21"
        },
        ("97222", "m2"): {
            "value": "src22"
        },
    }

Exemple #16

0

Afficher le fichier

    def build_from_data_source(cls, source):
        from libs.datasets.timeseries import TimeseriesDataset

        if set(source.INDEX_FIELD_MAP.keys()) == set(TimeseriesDataset.INDEX_FIELDS):
            timeseries = TimeseriesDataset.build_from_data_source(source)
            return timeseries.to_latest_values_dataset()

        if set(source.INDEX_FIELD_MAP.keys()) != set(cls.INDEX_FIELDS):
            raise ValueError("Index fields must match")

        return cls.from_source(source)

Exemple #17

0

Afficher le fichier

Fichier : combined_dataset_test.py Projet : andresmmujica/covid-data-model

def test_make_latest_from_timeseries_dont_touch_county():
    data = read_csv_and_index_fips_date(
        "fips,county,state,country,date,aggregate_level,m1,m2\n"
        "95123,Smith Countyy,YY,USA,2020-04-01,county,1,\n"
        "97123,Smith Countzz,ZZ,USA,2020-04-01,county,2,\n"
        "97,,ZZ,USA,2020-04-01,state,3,\n").reset_index()
    ts = TimeseriesDataset(data)
    assert to_dict(["fips"],
                   ts.latest_values()[["fips", "county", "m1", "m2"]]) == {
                       "95123": {
                           "m1": 1,
                           "county": "Smith Countyy"
                       },
                       "97123": {
                           "m1": 2,
                           "county": "Smith Countzz"
                       },
                       "97": {
                           "m1": 3
                       },
                   }

Exemple #18

0

Afficher le fichier

Fichier : load_data.py Projet : pkjmesra/covid-data-model

def get_hospitalization_data():
    data = combined_datasets.build_us_timeseries_with_all_fields().data
    # Since we're using this data for hospitalized data only, only returning
    # values with hospitalization data.  I think as the use cases of this data source
    # expand, we may not want to drop. For context, as of 4/8 607/1821 rows contained
    # hospitalization data.
    has_current_hospital = data[
        TimeseriesDataset.Fields.CURRENT_HOSPITALIZED].notnull()
    has_cumulative_hospital = data[
        TimeseriesDataset.Fields.CUMULATIVE_HOSPITALIZED].notnull()
    return TimeseriesDataset(data[has_current_hospital
                                  | has_cumulative_hospital])

Exemple #19

0

Afficher le fichier

Fichier : combined_dataset_utils_test.py Projet : andresmmujica/covid-data-model

def test_persist_and_load_dataset(tmp_path, nyc_fips):
    region = Region.from_fips(nyc_fips)
    dataset = combined_datasets.load_us_timeseries_dataset()
    timeseries_nyc = TimeseriesDataset(dataset.get_one_region(region).data)

    pointer = combined_dataset_utils.persist_dataset(timeseries_nyc, tmp_path)

    downloaded_dataset = pointer.load_dataset()
    differ_l = DatasetDiff.make(downloaded_dataset.data)
    differ_r = DatasetDiff.make(timeseries_nyc.data)
    differ_l.compare(differ_r)

    assert not len(differ_l.my_ts)

Exemple #20

0

Afficher le fichier

    def build_from_data_source(cls, source):
        from libs.datasets.timeseries import TimeseriesDataset

        if set(source.INDEX_FIELD_MAP.keys()) == set(
                TimeseriesDataset.INDEX_FIELDS):
            timeseries = TimeseriesDataset.build_from_data_source(source)
            return timeseries.to_latest_values_dataset()

        if set(source.INDEX_FIELD_MAP.keys()) != set(cls.INDEX_FIELDS):
            raise ValueError("Index fields must match")

        return cls.from_source(
            source, fill_missing_state=source.FILL_MISSING_STATE_LEVEL_DATA)

Exemple #21

0

Afficher le fichier

def test_summarize_timeseries_fields_with_some_real_data():
    data_source = CovidCountyDataDataSource.local()
    ts = TimeseriesDataset.from_source(data_source)
    summary = summarize_timeseries_fields(
        ts.data.loc[lambda df: df[CommonFields.FIPS].str.startswith("06")])
    assert not summary.empty
    cases_summary = summary.loc[("06025", "cases"), :]
    assert summary.loc[("06025", "cases"), "max_value"] > 7000
    assert summary.loc[("06025", "cases"),
                       "max_date"] > pd.to_datetime("2020-08-01")
    assert summary.loc[("06025", "cases"),
                       "largest_delta_date"] > pd.to_datetime("2020-04-01")
    assert cases_summary["has_value"] == True
    assert cases_summary["num_observations"] > 100

Exemple #22

0

Afficher le fichier

def get_hospitalization_data():
    """
    Since we're using this data for hospitalized data only, only returning
    values with hospitalization data.  I think as the use cases of this data source
    expand, we may not want to drop. For context, as of 4/8 607/1821 rows contained
    hospitalization data.
    Returns
    -------
    TimeseriesDataset
    """
    data = combined_datasets.load_us_timeseries_dataset().data
    has_current_hospital = data[CommonFields.CURRENT_HOSPITALIZED].notnull()
    has_cumulative_hospital = data[
        CommonFields.CUMULATIVE_HOSPITALIZED].notnull()
    return TimeseriesDataset(data[has_current_hospital
                                  | has_cumulative_hospital])

Exemple #23

0

Afficher le fichier

def test_load_from_local_public_data():
    agg = statistical_areas.CountyToCBSAAggregator.from_local_public_data()

    assert agg.cbsa_title_map["43580"] == "Sioux City, IA-NE-SD"
    assert agg.county_map["48187"] == "41700"

    ts = TimeseriesDataset.load_csv(
        io.StringIO("fips,state,aggregate_level,county,m1,date,foo\n"
                    "48059,ZZ,county,North County,3,2020-05-03,ef\n"
                    "48253,ZZ,county,South County,4,2020-05-03,ef\n"))
    ts_in = MultiRegionTimeseriesDataset.from_timeseries_and_latest(
        ts, ts.latest_values_object())
    ts_out = agg.aggregate(ts_in)
    ts_cbsa = ts_out.get_one_region(Region.from_cbsa_code("10180"))
    assert ts_cbsa.date_indexed["m1"].to_dict() == {
        pd.to_datetime("2020-05-03"): 7,
    }

Exemple #24

0

Afficher le fichier

def _write_pipeline_output(
    pipelines: List[Union[SubStatePipeline, StatePipeline]],
    output_dir: str,
    output_interval_days: int = 4,
    write_webui_output: bool = False,
):

    infection_rate_metric_df = pd.concat((p.infer_df for p in pipelines),
                                         ignore_index=True)
    # TODO: Use constructors in MultiRegionTimeseriesDataset
    timeseries_dataset = TimeseriesDataset(infection_rate_metric_df)
    latest = timeseries_dataset.latest_values_object()
    multiregion_rt = MultiRegionTimeseriesDataset.from_timeseries_and_latest(
        timeseries_dataset, latest)
    output_path = pathlib.Path(
        output_dir) / pyseir.utils.SummaryArtifact.RT_METRIC_COMBINED.value
    multiregion_rt.to_csv(output_path)
    root.info(f"Saving Rt results to {output_path}")

    icu_df = pd.concat((p.icu_data.data for p in pipelines if p.icu_data),
                       ignore_index=True)
    timeseries_dataset = TimeseriesDataset(icu_df)
    latest = timeseries_dataset.latest_values_object().data.set_index(
        CommonFields.LOCATION_ID)
    multiregion_icu = MultiRegionTimeseriesDataset(icu_df, latest)

    output_path = pathlib.Path(
        output_dir) / pyseir.utils.SummaryArtifact.ICU_METRIC_COMBINED.value
    multiregion_icu.to_csv(output_path)
    root.info(f"Saving ICU results to {output_path}")

    if write_webui_output:
        # does not parallelize well, because web_ui mapper doesn't serialize efficiently
        # TODO: Remove intermediate artifacts and paralellize artifacts creation better
        # Approximately 40% of the processing time is taken on this step
        web_ui_mapper = WebUIDataAdaptorV1(
            output_interval_days=output_interval_days,
            output_dir=output_dir,
        )
        webui_inputs = [
            webui_data_adaptor_v1.RegionalInput.from_results(
                p.fitter, p.ensemble, p.infer_df) for p in pipelines
            if p.fitter
        ]

        with Pool(maxtasksperchild=1) as p:
            p.map(web_ui_mapper.write_region_safely, webui_inputs)

Exemple #25

0

Afficher le fichier

def test_expected_field_in_sources(data_source_cls):
    data_source = data_source_cls.local()
    ts = TimeseriesDataset.from_source(data_source)
    # Extract the USA data from the raw DF. Replace this with cleaner access when the DataSource makes it easy.
    rename_columns = {source: common for common, source in data_source.all_fields_map().items()}
    renamed_data = data_source.data.rename(columns=rename_columns)
    usa_data = renamed_data.loc[renamed_data["country"] == "USA"]

    assert not usa_data.empty

    states = set(usa_data["state"])

    if data_source.SOURCE_NAME == "NHA":
        assert states == {"NV"}
    else:
        good_state = set()
        for state in states:
            if re.fullmatch(r"[A-Z]{2}", state):
                good_state.add(state)
            else:
                logging.info(f"Ignoring {state} in {data_source.SOURCE_NAME}")
        assert len(good_state) >= 48

Exemple #26

0

Afficher le fichier

Fichier : data_source.py Projet : ashgadala/covid-data-model

 def timeseries(self) -> TimeseriesDataset:
     """Builds generic beds dataset"""
     return TimeseriesDataset.build_from_data_source(self)

Exemple #27

0

Afficher le fichier

Fichier : data_source.py Projet : paulirish/covid-data-model

 def timeseries(self) -> "TimeseriesDataset":
     """Builds generic beds dataset"""
     return TimeseriesDataset.from_source(self)