def test_update_and_load(tmp_path: pathlib.Path, nyc_fips, nyc_region):
    us_combined_df = combined_datasets.load_us_timeseries_dataset().combined_df

    # restricting the datasets being persisted to one county to speed up tests a bit.
    nyc_combined_df = us_combined_df.loc[us_combined_df[CommonFields.FIPS] ==
                                         nyc_fips, :]
    multiregion_timeseries_nyc = MultiRegionTimeseriesDataset.from_combined_dataframe(
        nyc_combined_df)
    latest_nyc = LatestValuesDataset(
        multiregion_timeseries_nyc.latest_data_with_fips.reset_index())
    latest_nyc_record = latest_nyc.get_record_for_fips(nyc_fips)
    assert latest_nyc_record[CommonFields.POPULATION] > 1_000_000
    assert latest_nyc_record[CommonFields.LOCATION_ID]

    combined_dataset_utils.update_data_public_head(
        tmp_path,
        latest_dataset=latest_nyc,
        timeseries_dataset=multiregion_timeseries_nyc,
    )

    timeseries_loaded = combined_datasets.load_us_timeseries_dataset(
        pointer_directory=tmp_path)
    latest_loaded = combined_datasets.load_us_latest_dataset(
        pointer_directory=tmp_path)
    assert latest_loaded.get_record_for_fips(nyc_fips) == latest_nyc_record
    assert_combined_like(timeseries_loaded, multiregion_timeseries_nyc)
Example #2
0
def _cache_global_datasets():
    # Populate cache for combined latest and timeseries.  Caching pre-fork
    # will make sure cache is populated for subprocesses.  Return value
    # is not needed as the only goal is to populate the cache.
    combined_datasets.load_us_latest_dataset()
    combined_datasets.load_us_timeseries_dataset()
    infer_icu.get_region_weight_map()
Example #3
0
def test_fips_metadata(nyc_fips):
    combined_datasets.load_us_timeseries_dataset()
    fitter = initial_conditions_fitter.InitialConditionsFitter(nyc_fips)
    assert fitter.state == "NY"
    assert fitter.county == "New York County"
    assert fitter.data_start_date == pd.Timestamp("2020-03-01")
    # Checking to make sure that y is a numpy array rather than a pandas DF.
    assert isinstance(fitter.y, numpy.ndarray)
Example #4
0
def generate_api_v2(model_output_dir, output, aggregation_level, state, fips):
    """The entry function for invocation"""

    # Caching load of us timeseries dataset
    combined_datasets.load_us_timeseries_dataset()

    active_states = [state.abbr for state in us.STATES]
    active_states = active_states + ["PR", "MP"]

    # Load all API Regions
    regions = combined_datasets.get_subset_regions(
        aggregation_level=aggregation_level,
        exclude_county_999=True,
        state=state,
        fips=fips,
        states=active_states,
    )
    _logger.info(f"Loading all regional inputs.")

    icu_data_path = model_output_dir / SummaryArtifact.ICU_METRIC_COMBINED.value
    icu_data = MultiRegionTimeseriesDataset.from_csv(icu_data_path)
    icu_data_map = dict(icu_data.iter_one_regions())

    rt_data_path = model_output_dir / SummaryArtifact.RT_METRIC_COMBINED.value
    rt_data = MultiRegionTimeseriesDataset.from_csv(rt_data_path)
    rt_data_map = dict(rt_data.iter_one_regions())

    regions_data = combined_datasets.load_us_timeseries_dataset(
    ).get_regions_subset(regions)

    regional_inputs = [
        api_v2_pipeline.RegionalInput.from_one_regions(
            region,
            regional_data,
            icu_data=icu_data_map.get(region),
            rt_data=rt_data_map.get(region),
        ) for region, regional_data in regions_data.iter_one_regions()
    ]

    _logger.info(f"Finished loading all regional inputs.")

    # Build all region timeseries API Output objects.
    _logger.info("Generating all API Timeseries")
    all_timeseries = api_v2_pipeline.run_on_regions(regional_inputs)

    api_v2_pipeline.deploy_single_level(all_timeseries,
                                        AggregationLevel.COUNTY, output)
    api_v2_pipeline.deploy_single_level(all_timeseries, AggregationLevel.STATE,
                                        output)

    _logger.info("Finished API generation.")
Example #5
0
def generate_api(input_dir, output, summary_output, aggregation_level, state,
                 fips):
    """The entry function for invocation"""

    # Caching load of us timeseries dataset
    combined_datasets.load_us_timeseries_dataset()

    active_states = [state.abbr for state in us.STATES]
    active_states = active_states + ["PR", "MP"]
    regions = combined_datasets.get_subset_regions(
        aggregation_level=aggregation_level,
        exclude_county_999=True,
        state=state,
        fips=fips,
        states=active_states,
    )

    icu_data_path = input_dir / SummaryArtifact.ICU_METRIC_COMBINED.value
    icu_data = MultiRegionTimeseriesDataset.from_csv(icu_data_path)
    rt_data_path = input_dir / SummaryArtifact.RT_METRIC_COMBINED.value
    rt_data = MultiRegionTimeseriesDataset.from_csv(rt_data_path)

    for intervention in list(Intervention):
        _logger.info(f"Running intervention {intervention.name}")

        _load_input = functools.partial(
            api_pipeline.RegionalInput.from_region_and_intervention,
            intervention=intervention,
            rt_data=rt_data,
            icu_data=icu_data,
        )
        with multiprocessing.Pool(maxtasksperchild=1) as pool:
            regional_inputs = pool.map(_load_input, regions)

        _logger.info(f"Loaded {len(regional_inputs)} regions.")
        all_timeseries = api_pipeline.run_on_all_regional_inputs_for_intervention(
            regional_inputs)
        county_timeseries = [
            output for output in all_timeseries
            if output.aggregate_level is AggregationLevel.COUNTY
        ]
        api_pipeline.deploy_single_level(intervention, county_timeseries,
                                         summary_output, output)
        state_timeseries = [
            output for output in all_timeseries
            if output.aggregate_level is AggregationLevel.STATE
        ]
        api_pipeline.deploy_single_level(intervention, state_timeseries,
                                         summary_output, output)
Example #6
0
def test_generate_timeseries_for_fips(nyc_model_output_path, nyc_region,
                                      nyc_rt_dataset, nyc_icu_dataset):
    us_latest = combined_datasets.load_us_latest_dataset()
    us_timeseries = combined_datasets.load_us_timeseries_dataset()

    nyc_latest = us_latest.get_record_for_fips(nyc_region.fips)
    nyc_timeseries = us_timeseries.get_one_region(nyc_region)
    metrics_series, latest_metric = api_v2_pipeline.generate_metrics_and_latest(
        nyc_timeseries, nyc_rt_dataset, nyc_icu_dataset)
    risk_levels = top_level_metric_risk_levels.calculate_risk_level_from_metrics(
        latest_metric)

    region_summary = build_api_v2.build_region_summary(nyc_latest,
                                                       latest_metric,
                                                       risk_levels)
    region_timeseries = build_api_v2.build_region_timeseries(
        region_summary, nyc_timeseries, metrics_series)

    summary = build_api_v2.build_region_summary(nyc_latest, latest_metric,
                                                risk_levels)

    assert summary.dict() == region_timeseries.region_summary.dict()
    # Double checking that serialized json does not contain NaNs, all values should
    # be serialized using the simplejson wrapper.
    assert "NaN" not in region_timeseries.json()
Example #7
0
def generate_top_counties(disable_validation, input_dir, output, state, fips):
    """The entry function for invocation"""
    intervention = Intervention.SELECTED_INTERVENTION
    active_states = [state.abbr for state in us.STATES]
    us_latest = combined_datasets.load_us_latest_dataset().get_subset(
        AggregationLevel.COUNTY, states=active_states, state=state, fips=fips)
    us_timeseries = combined_datasets.load_us_timeseries_dataset().get_subset(
        AggregationLevel.COUNTY, states=active_states, state=state, fips=fips)

    def sort_func(output: RegionSummaryWithTimeseries):
        return -output.projections.totalHospitalBeds.peakShortfall

    all_timeseries = api_pipeline.run_on_all_fips_for_intervention(
        us_latest,
        us_timeseries,
        Intervention.SELECTED_INTERVENTION,
        input_dir,
        sort_func=sort_func,
        limit=100,
    )
    bulk_timeseries = AggregateRegionSummaryWithTimeseries(
        __root__=all_timeseries)

    api_pipeline.deploy_json_api_output(
        intervention,
        bulk_timeseries,
        output,
        filename_override="counties_top_100.json")
Example #8
0
def run_bad_tails_filter(output_path: pathlib.Path):
    us_dataset = combined_datasets.load_us_timeseries_dataset()
    log = structlog.get_logger()
    log.info("Starting filter")
    _, dataset_out = TailFilter.run(us_dataset, CUMULATIVE_FIELDS_TO_FILTER)
    log.info("Writing output")
    dataset_out.timeseries_rows().to_csv(output_path, index=True, float_format="%.05g")
Example #9
0
def run_population_filter(output_path: pathlib.Path):
    us_timeseries = combined_datasets.load_us_timeseries_dataset()
    log = structlog.get_logger()
    log.info("starting filter")
    ts_out = timeseries.drop_regions_without_population(
        us_timeseries, KNOWN_LOCATION_ID_WITHOUT_POPULATION, log)
    ts_out.to_csv(output_path)
Example #10
0
def test_nyc_aggregation(nyc_region):
    dataset = combined_datasets.load_us_timeseries_dataset()
    data = dataset.get_one_region(nyc_region).latest
    # Check to make sure that beds occupancy rates are below 1,
    # signaling that it is properly combining occupancy rates.
    assert data["all_beds_occupancy_rate"] < 1
    assert data["icu_occupancy_rate"] < 1
Example #11
0
def generate_api(input_dir, output, summary_output, aggregation_level, state,
                 fips):
    """The entry function for invocation"""

    active_states = [state.abbr for state in us.STATES]
    us_latest = combined_datasets.load_us_latest_dataset().get_subset(
        aggregation_level, state=state, fips=fips, states=active_states)
    us_timeseries = combined_datasets.load_us_timeseries_dataset().get_subset(
        aggregation_level, state=state, fips=fips, states=active_states)

    for intervention in list(Intervention):
        _logger.info(f"Running intervention {intervention.name}")
        all_timeseries = api_pipeline.run_on_all_fips_for_intervention(
            us_latest, us_timeseries, intervention, input_dir)
        county_timeseries = [
            output for output in all_timeseries
            if output.aggregate_level is AggregationLevel.COUNTY
        ]
        api_pipeline.deploy_single_level(intervention, county_timeseries,
                                         summary_output, output)
        state_timeseries = [
            output for output in all_timeseries
            if output.aggregate_level is AggregationLevel.STATE
        ]
        api_pipeline.deploy_single_level(intervention, state_timeseries,
                                         summary_output, output)
Example #12
0
def test_build_api_output_for_intervention(nyc_fips, nyc_model_output_path, tmp_path):
    county_output = tmp_path / "county"
    us_latest = combined_datasets.load_us_latest_dataset()
    us_timeseries = combined_datasets.load_us_timeseries_dataset()

    nyc_latest = us_latest.get_subset(None, fips=nyc_fips)
    nyc_timeseries = us_timeseries.get_subset(None, fips=nyc_fips)
    all_timeseries_api = api_pipeline.run_on_all_fips_for_intervention(
        nyc_latest, nyc_timeseries, Intervention.STRONG_INTERVENTION, nyc_model_output_path.parent
    )

    api_pipeline.deploy_single_level(
        Intervention.STRONG_INTERVENTION, all_timeseries_api, tmp_path, county_output
    )
    expected_outputs = [
        "counties.STRONG_INTERVENTION.timeseries.json",
        "counties.STRONG_INTERVENTION.csv",
        "counties.STRONG_INTERVENTION.timeseries.csv",
        "counties.STRONG_INTERVENTION.json",
        "county/36061.STRONG_INTERVENTION.json",
        "county/36061.STRONG_INTERVENTION.timeseries.json",
    ]

    output_paths = [
        str(path.relative_to(tmp_path)) for path in tmp_path.glob("**/*") if not path.is_dir()
    ]
    assert sorted(output_paths) == sorted(expected_outputs)
Example #13
0
def generate_test_positivity(
    test_positivity_all_methods: pathlib.Path,
    final_result: pathlib.Path,
    output_dir: pathlib.Path,
    state: Optional[str],
    fips: Optional[str],
):
    if state:
        active_states = [state]
    else:
        active_states = [state.abbr for state in us.STATES]
        active_states = active_states + ["PR", "MP"]

    selected_dataset = combined_datasets.load_us_timeseries_dataset(
    ).get_subset(
        exclude_county_999=True,
        states=active_states,
        fips=fips,
    )
    test_positivity_results = test_positivity.AllMethods.run(selected_dataset)
    _write_dataset_map(output_dir / test_positivity_all_methods,
                       test_positivity_results.all_methods_datasets)

    test_positivity_results.test_positivity.timeseries_rows().to_csv(
        output_dir / final_result, index=True, float_format="%.05g")
Example #14
0
def test_generate_timeseries_for_fips(
    include_projections,
    nyc_model_output_path,
    nyc_region,
    nyc_rt_dataset,
    nyc_icu_dataset,
):
    us_latest = combined_datasets.load_us_latest_dataset()
    us_timeseries = combined_datasets.load_us_timeseries_dataset()

    nyc_latest = us_latest.get_record_for_fips(nyc_region.fips)
    nyc_timeseries = us_timeseries.get_one_region(nyc_region)
    intervention = Intervention.OBSERVED_INTERVENTION
    model_output = CANPyseirLocationOutput.load_from_path(
        nyc_model_output_path)
    metrics_series, latest_metric = api_pipeline.generate_metrics_and_latest(
        nyc_timeseries, nyc_rt_dataset, nyc_icu_dataset)

    region_summary = generate_api.generate_region_summary(
        nyc_latest, latest_metric, model_output)
    region_timeseries = generate_api.generate_region_timeseries(
        region_summary, nyc_timeseries, metrics_series, model_output)

    summary = generate_api.generate_region_summary(nyc_latest, latest_metric,
                                                   model_output)

    assert summary.dict() == region_timeseries.region_summary.dict()
    # Double checking that serialized json does not contain NaNs, all values should
    # be serialized using the simplejson wrapper.
    assert "NaN" not in region_timeseries.json()
Example #15
0
def build_all(states, output_dir, level, fips, location_id_matches: str,
              generate_api_v2: bool):
    # split columns by ',' and remove whitespace
    states = [c.strip() for c in states]
    states = [us.states.lookup(state).abbr for state in states]
    states = [state for state in states if state in ALL_STATES]

    # prepare data
    _cache_global_datasets()

    regions_dataset = combined_datasets.load_us_timeseries_dataset(
    ).get_subset(
        fips=fips,
        aggregation_level=level,
        exclude_county_999=True,
        states=states,
        location_id_matches=location_id_matches,
    )
    regions = [
        one_region for _, one_region in regions_dataset.iter_one_regions()
    ]
    root.info(f"Executing pipeline for {len(regions)} regions")
    region_pipelines: List[OneRegionPipeline] = parallel_utils.parallel_map(
        OneRegionPipeline.run, regions)
    region_pipelines = _patch_nola_infection_rate_in_pipelines(
        region_pipelines)

    model_output = pyseir.run.PyseirOutputDatasets.from_pipeline_output(
        region_pipelines)
    model_output.write(output_dir, root)

    if generate_api_v2:
        api_v2_pipeline.generate_from_loaded_data(model_output, output_dir,
                                                  regions_dataset, root)
Example #16
0
def test_combined_county_has_some_data(fips):
    region_data = combined_datasets.load_us_timeseries_dataset().get_one_region(
        Region.from_fips(fips)
    )
    assert region_data.data[CommonFields.POSITIVE_TESTS].all()
    assert region_data.data[CommonFields.NEGATIVE_TESTS].all()
    assert region_data.latest[CommonFields.DEATHS] > 1
Example #17
0
def save_combined_csv(csv_path_format, output_dir):
    """Save the combined datasets DataFrame, cleaned up for easier comparisons."""
    csv_path = form_path_name(csv_path_format, output_dir)

    timeseries = combined_datasets.load_us_timeseries_dataset()
    timeseries_data = timeseries.data

    common_df.write_csv(timeseries_data, csv_path, structlog.get_logger())
Example #18
0
def test_build_summary_for_fips(include_model_output, rt_null, nyc_region,
                                nyc_icu_dataset, nyc_rt_dataset):
    us_latest = combined_datasets.load_us_latest_dataset()
    us_timeseries = combined_datasets.load_us_timeseries_dataset()
    nyc_latest = us_latest.get_record_for_fips(nyc_region.fips)
    model_output = None
    expected_projections = None

    if include_model_output:
        if rt_null:
            nyc_rt_dataset = None
    else:
        nyc_icu_dataset = None
        nyc_rt_dataset = None

    fips_timeseries = us_timeseries.get_one_region(nyc_region)

    metrics_series, latest_metric = api_v2_pipeline.generate_metrics_and_latest(
        fips_timeseries, nyc_rt_dataset, nyc_icu_dataset)
    risk_levels = top_level_metric_risk_levels.calculate_risk_level_from_metrics(
        latest_metric)
    assert latest_metric
    summary = build_api_v2.build_region_summary(nyc_latest, latest_metric,
                                                risk_levels)
    expected = RegionSummary(
        population=nyc_latest["population"],
        state="NY",
        country="USA",
        level="county",
        county="New York County",
        fips="36061",
        lat=None,
        long=None,
        metrics=latest_metric,
        riskLevels=risk_levels,
        actuals=Actuals(
            cases=nyc_latest["cases"],
            deaths=nyc_latest["deaths"],
            positiveTests=nyc_latest["positive_tests"],
            negativeTests=nyc_latest["negative_tests"],
            hospitalBeds={
                "capacity": nyc_latest["max_bed_count"],
                "currentUsageCovid": None,
                "currentUsageTotal": None,
                "typicalUsageRate": nyc_latest["all_beds_occupancy_rate"],
            },
            icuBeds={
                "capacity": nyc_latest["icu_beds"],
                "totalCapacity": nyc_latest["icu_beds"],
                "currentUsageCovid": None,
                "currentUsageTotal": None,
                "typicalUsageRate": nyc_latest["icu_occupancy_rate"],
            },
            contactTracers=nyc_latest["contact_tracers_count"],
        ),
        lastUpdatedDate=datetime.datetime.utcnow(),
    )
    assert expected.dict() == summary.dict()
Example #19
0
def il_regional_input(rt_dataset, icu_dataset):
    region = Region.from_state("IL")
    regional_data = combined_datasets.load_us_timeseries_dataset(
    ).get_regions_subset([region])
    regional_data = test_positivity.run_and_maybe_join_columns(
        regional_data, structlog.get_logger())

    return api_v2_pipeline.RegionalInput.from_region_and_model_output(
        region, regional_data, rt_dataset, icu_dataset)
def il_regional_input(rt_dataset, icu_dataset):
    region = Region.from_state("IL")
    regional_data = combined_datasets.load_us_timeseries_dataset(
    ).get_regions_subset([region])
    # TODO(tom): add test positivity back in after PR 728 is merged.
    # test_positivity_results = test_positivity.AllMethods.run(regional_data)
    # regional_data = regional_data.join_columns(test_positivity_results.test_positivity)
    return api_v2_pipeline.RegionalInput.from_region_and_model_output(
        region, regional_data, rt_dataset, icu_dataset)
def test_update_and_load(tmp_path: pathlib.Path, nyc_fips, nyc_region):
    # restricting the datasets being persisted to one county to speed up tests a bit.
    multiregion_timeseries_nyc = combined_datasets.load_us_timeseries_dataset().get_regions_subset(
        [nyc_region]
    )
    one_region_nyc = multiregion_timeseries_nyc.get_one_region(nyc_region)
    assert one_region_nyc.latest[CommonFields.POPULATION] > 1_000_000
    assert one_region_nyc.region.location_id

    combined_dataset_utils.persist_dataset(
        multiregion_timeseries_nyc, tmp_path,
    )

    timeseries_loaded = combined_datasets.load_us_timeseries_dataset(pointer_directory=tmp_path)
    one_region_loaded = timeseries_loaded.get_one_region(nyc_region)
    assert one_region_nyc.latest == pytest.approx(one_region_loaded.latest)
    test_helpers.assert_dataset_like(
        timeseries_loaded, multiregion_timeseries_nyc, drop_na_timeseries=True
    )
Example #22
0
def generate_test_positivity(test_positivity_all_methods: pathlib.Path):
    active_states = [state.abbr for state in us.STATES]
    active_states = active_states + ["PR", "MP"]
    regions = combined_datasets.get_subset_regions(
        exclude_county_999=True,
        states=active_states,
    )

    regions_data = combined_datasets.load_us_timeseries_dataset(
    ).get_regions_subset(regions)
    test_positivity_results = test_positivity.AllMethods.run(regions_data)
    test_positivity_results.write(test_positivity_all_methods)
Example #23
0
def test_persist_and_load_dataset(tmp_path, nyc_fips):
    dataset = combined_datasets.load_us_timeseries_dataset()
    timeseries_nyc = dataset.get_subset(None, fips=nyc_fips)

    pointer = combined_dataset_utils.persist_dataset(timeseries_nyc, tmp_path)

    downloaded_dataset = pointer.load_dataset()
    differ_l = DatasetDiff.make(downloaded_dataset.data)
    differ_r = DatasetDiff.make(timeseries_nyc.data)
    differ_l.compare(differ_r)

    assert not len(differ_l.my_ts)
def test_combined_county_has_some_timeseries_data(fips):
    region = Region.from_fips(fips)
    latest = combined_datasets.load_us_timeseries_dataset().get_one_region(
        region)
    df = latest.data.set_index(CommonFields.DATE)
    assert df.loc["2020-05-01", CommonFields.CASES] > 0
    assert df.loc["2020-05-01", CommonFields.DEATHS] > 0
    if fips.startswith(
            "06"
    ):  # TODO(tom): Remove this condition when we have county data in TX too.
        assert df.loc["2020-05-01", CommonFields.POSITIVE_TESTS] > 0
        assert df.loc["2020-05-01", CommonFields.NEGATIVE_TESTS] > 0
        assert df.loc["2020-05-01", CommonFields.CURRENT_ICU] > 0
def test_persist_and_load_dataset(tmp_path, nyc_fips):
    region = Region.from_fips(nyc_fips)
    dataset = combined_datasets.load_us_timeseries_dataset()
    timeseries_nyc = TimeseriesDataset(dataset.get_one_region(region).data)

    pointer = combined_dataset_utils.persist_dataset(timeseries_nyc, tmp_path)

    downloaded_dataset = pointer.load_dataset()
    differ_l = DatasetDiff.make(downloaded_dataset.data)
    differ_r = DatasetDiff.make(timeseries_nyc.data)
    differ_l.compare(differ_r)

    assert not len(differ_l.my_ts)
Example #26
0
def test_update_and_load(tmp_path: pathlib.Path, nyc_fips):
    latest = combined_datasets.load_us_latest_dataset()
    timeseries_dataset = combined_datasets.load_us_timeseries_dataset()

    # restricting the datasets being persisted to one county to speed up tests a bit.
    latest_nyc = latest.get_subset(None, fips=nyc_fips)
    timeseries_nyc = timeseries_dataset.get_subset(None, fips=nyc_fips)

    combined_dataset_utils.update_data_public_head(
        tmp_path,
        latest_dataset=latest_nyc,
        timeseries_dataset=timeseries_nyc,
    )

    timeseries = combined_datasets.load_us_timeseries_dataset(
        pointer_directory=tmp_path)

    latest = combined_datasets.load_us_latest_dataset(
        pointer_directory=tmp_path)

    assert latest.get_record_for_fips(
        nyc_fips) == latest_nyc.get_record_for_fips(nyc_fips)
def test_persist_and_load_dataset(tmp_path, nyc_fips):
    region = Region.from_fips(nyc_fips)
    dataset = combined_datasets.load_us_timeseries_dataset()
    timeseries_nyc = dataset.get_regions_subset([region])

    pointer = combined_dataset_utils.persist_dataset(timeseries_nyc, tmp_path)

    downloaded_dataset = MultiRegionDataset.read_from_pointer(pointer)
    differ_l = DatasetDiff.make(downloaded_dataset.timeseries)
    differ_r = DatasetDiff.make(timeseries_nyc.timeseries)
    differ_l.compare(differ_r)

    assert not len(differ_l.my_ts)
Example #28
0
def test_output_no_timeseries_rows(nyc_regional_input, tmp_path):

    # Creating a new regional input with an empty timeseries dataset
    timeseries = nyc_regional_input.timeseries
    one_region = combined_datasets.load_us_timeseries_dataset().get_one_region(
        nyc_regional_input.region)
    regional_input = api_v2_pipeline.RegionalInput(nyc_regional_input.region,
                                                   one_region, None, None)
    assert not regional_input.timeseries.empty

    all_timeseries_api = api_v2_pipeline.run_on_regions([regional_input])

    assert all_timeseries_api
def il_regional_input_empty_test_positivity_column(rt_dataset, icu_dataset):
    region = Region.from_state("IL")
    regional_data = combined_datasets.load_us_timeseries_dataset(
    ).get_regions_subset([region])
    empty_test_positivity = timeseries.MultiRegionTimeseriesDataset.from_timeseries_df(
        pd.DataFrame([],
                     columns=[
                         CommonFields.LOCATION_ID, CommonFields.DATE,
                         CommonFields.TEST_POSITIVITY
                     ]))

    regional_data = regional_data.join_columns(empty_test_positivity)
    return api_v2_pipeline.RegionalInput.from_region_and_model_output(
        region, regional_data, rt_dataset, icu_dataset)
Example #30
0
def calculate_case_based_weights() -> dict:
    LOOKBACK_DAYS = 31
    SUMMED_CASES_LABEL = "summed_cases"
    cutoff_date = pd.Timestamp.now() - pd.Timedelta(days=LOOKBACK_DAYS)
    us_dataset = combined_datasets.load_us_timeseries_dataset()
    region_groupby = us_dataset.get_counties_and_places(
        after=cutoff_date).groupby_region()

    last_month_cum_cases = region_groupby[CommonFields.CASES].apply(
        _quantile_range)
    last_month_cum_cases.name = SUMMED_CASES_LABEL

    df = last_month_cum_cases.reset_index().dropna()
    timeseries._add_fips_if_missing(df)
    # Example location_id value = 'iso1:us#iso2:us-ak#fips:02013'
    df["state_location_id"] = df[CommonFields.LOCATION_ID.value].str.split(
        "#").str[1]
    # Normalize the cases based on the groupby total
    df["weight"] = df.groupby("state_location_id")[
        SUMMED_CASES_LABEL].transform(lambda x: x / x.sum())
    df["weight"] = df["weight"].round(4)
    # Convert to dict mapping
    output = df.set_index(CommonFields.FIPS.value)["weight"].to_dict()

    # Set the default weight to 0 for the few counties with no cases in the window of interest
    all_county_fips = {
        region.fips
        for region, _ in combined_datasets.load_us_timeseries_dataset().
        get_subset(aggregation_level=AggregationLevel.COUNTY,
                   exclude_county_999=True).iter_one_regions()
    }

    for fips in all_county_fips:
        if fips not in output:
            output[fips] = 0

    return output