Esempio n. 1
0
def _cache_global_datasets():
    # Populate cache for combined latest and timeseries.  Caching pre-fork
    # will make sure cache is populated for subprocesses.  Return value
    # is not needed as the only goal is to populate the cache.
    combined_datasets.build_us_latest_with_all_fields()
    combined_datasets.build_us_timeseries_with_all_fields()

    global nyt_dataset, cds_dataset
    if cds_dataset is None:
        cds_dataset = CDSDataset.local()
    if nyt_dataset is None:
        nyt_dataset = NYTimesDataset.local()
Esempio n. 2
0
def update_data_public_head(
    data_directory: pathlib.Path,
    latest_dataset: latest_values_dataset.LatestValuesDataset = None,
    timeseries_dataset: timeseries.TimeseriesDataset = None,
) -> Tuple[DatasetPointer, DatasetPointer]:
    """Persists US latest and timeseries dataset and saves dataset pointers for Latest tag.

    Args:
        data_directory: Directory to save dataset and pointer.
        pointer_path_dir: Directory to save DatasetPointer files.
        latest_dataset: Optionally specify a LatestValuesDataset to persist instead of building
            from head.  Generally used in testing to sidestep building entire dataset.
        timeseries_dataset: Optionally specify a TimeseriesDataset to persist instead of building
            from head.  Generally used in testing to sidestep building entire dataset.

    Returns: Tuple of DatasetPointers to latest and timeseries datasets.
    """
    if not latest_dataset:
        latest_dataset = combined_datasets.build_us_latest_with_all_fields(skip_cache=True)
    latest_pointer = persist_dataset(latest_dataset, data_directory)

    if not timeseries_dataset:
        timeseries_dataset = combined_datasets.build_us_timeseries_with_all_fields(skip_cache=True)
    timeseries_pointer = persist_dataset(timeseries_dataset, data_directory)
    return latest_pointer, timeseries_pointer
Esempio n. 3
0
def test_build_api_output_for_intervention(nyc_fips, nyc_model_output_path,
                                           tmp_path):
    county_output = tmp_path / "county"
    us_latest = combined_datasets.build_us_latest_with_all_fields()
    us_timeseries = combined_datasets.build_us_timeseries_with_all_fields()

    nyc_latest = us_latest.get_subset(None, fips=nyc_fips)
    nyc_timeseries = us_timeseries.get_subset(None, fips=nyc_fips)
    all_timeseries_api = api_pipeline.run_on_all_fips_for_intervention(
        nyc_latest, nyc_timeseries, Intervention.STRONG_INTERVENTION,
        nyc_model_output_path.parent)

    api_pipeline.deploy_single_level(Intervention.STRONG_INTERVENTION,
                                     all_timeseries_api, tmp_path,
                                     county_output)
    expected_outputs = [
        "counties.STRONG_INTERVENTION.timeseries.json",
        "counties.STRONG_INTERVENTION.csv",
        "counties.STRONG_INTERVENTION.timeseries.csv",
        "counties.STRONG_INTERVENTION.json",
        "county/36061.STRONG_INTERVENTION.json",
        "county/36061.STRONG_INTERVENTION.timeseries.json",
    ]

    output_paths = [
        str(path.relative_to(tmp_path)) for path in tmp_path.glob("**/*")
        if not path.is_dir()
    ]
    assert sorted(output_paths) == sorted(expected_outputs)
Esempio n. 4
0
def generate_api(input_dir, output, summary_output, aggregation_level, state,
                 fips):
    """The entry function for invocation"""

    active_states = [state.abbr for state in us.STATES]
    us_latest = combined_datasets.build_us_latest_with_all_fields().get_subset(
        aggregation_level, state=state, fips=fips, states=active_states)
    us_timeseries = combined_datasets.build_us_timeseries_with_all_fields(
    ).get_subset(aggregation_level,
                 state=state,
                 fips=fips,
                 states=active_states)

    for intervention in list(Intervention):
        _logger.info(f"Running intervention {intervention.name}")
        all_timeseries = api_pipeline.run_on_all_fips_for_intervention(
            us_latest, us_timeseries, intervention, input_dir)
        county_timeseries = [
            output for output in all_timeseries
            if output.aggregate_level is AggregationLevel.COUNTY
        ]
        api_pipeline.deploy_single_level(intervention, county_timeseries,
                                         summary_output, output)
        state_timeseries = [
            output for output in all_timeseries
            if output.aggregate_level is AggregationLevel.STATE
        ]
        api_pipeline.deploy_single_level(intervention, state_timeseries,
                                         summary_output, output)
Esempio n. 5
0
def generate_top_counties(disable_validation, input_dir, output, state, fips):
    """The entry function for invocation"""
    intervention = Intervention.SELECTED_INTERVENTION
    active_states = [state.abbr for state in us.STATES]
    us_latest = combined_datasets.build_us_latest_with_all_fields().get_subset(
        AggregationLevel.COUNTY, states=active_states, state=state, fips=fips)
    us_timeseries = combined_datasets.build_us_timeseries_with_all_fields(
    ).get_subset(AggregationLevel.COUNTY,
                 states=active_states,
                 state=state,
                 fips=fips)

    def sort_func(output: CovidActNowAreaTimeseries):
        return -output.projections.totalHospitalBeds.peakShortfall

    all_timeseries = api_pipeline.run_on_all_fips_for_intervention(
        us_latest,
        us_timeseries,
        Intervention.SELECTED_INTERVENTION,
        input_dir,
        sort_func=sort_func,
        limit=100,
    )
    bulk_timeseries = CovidActNowBulkTimeseries(__root__=all_timeseries)

    api_pipeline.deploy_json_api_output(
        intervention,
        bulk_timeseries,
        output,
        filename_override="counties_top_100.json")
Esempio n. 6
0
def save_combined_latest_csv(csv_path_format, output_dir):
    """Save the combined datasets latest DataFrame, cleaned up for easier comparisons."""
    csv_path = form_path_name(csv_path_format, output_dir)

    latest = combined_datasets.build_us_latest_with_all_fields()
    # This is a hacky modification of common_df.write_csv because it requires a date index.
    latest_data = latest.data.set_index(CommonFields.FIPS).replace({
        pd.NA:
        np.nan
    }).convert_dtypes()
    latest_data.to_csv(csv_path,
                       date_format="%Y-%m-%d",
                       index=True,
                       float_format="%.12g")
Esempio n. 7
0
def generate_api_for_state_projection_row(projection_row) -> CovidActNowStateSummary:
    state_abbrev = US_STATE_ABBREV[projection_row[rc.STATE_FULL_NAME]]
    projections = _generate_api_for_projections(projection_row)
    state_intervention = get_can_projection.get_intervention_for_state(state_abbrev)
    state_actuals = combined_datasets.build_us_latest_with_all_fields().get_record_for_state(state_abbrev)

    state_result = CovidActNowStateSummary(
        population=state_actuals[CommonFields.POPULATION],
        lat=projection_row[rc.LATITUDE],
        long=projection_row[rc.LONGITUDE],
        actuals=_generate_actuals(state_actuals, state_intervention),
        stateName=projection_row[rc.STATE_FULL_NAME],
        fips=projection_row[rc.FIPS],
        lastUpdatedDate=_format_date(projection_row[rc.LAST_UPDATED]),
        projections=projections,
    )
    return state_result
Esempio n. 8
0
def generate_state_timeseries(
    projection_row, intervention, input_dir
) -> CovidActNowStateTimeseries:
    state = US_STATE_ABBREV[projection_row[rc.STATE_FULL_NAME]]
    fips = projection_row[rc.FIPS]
    raw_dataseries = get_can_projection.get_can_raw_data(
        input_dir, state, fips, AggregationLevel.STATE, intervention
    )

    # join in state testing data onto the timeseries
    # left join '%m/%d/%y', so the left join gracefully handles
    # missing state testing data (i.e. NE)
    testing_df = get_testing_timeseries_by_state(state)
    new_df = pd.DataFrame(raw_dataseries).merge(testing_df, on="date", how="left")
    can_dataseries = new_df.to_dict(orient="records")

    timeseries = []
    for data_series in can_dataseries:
        timeseries.append(_generate_state_timeseries_row(data_series))

    projections = _generate_api_for_projections(projection_row)
    if len(timeseries) < 1:
        raise Exception(f"State time series empty for {intervention.name}")

    state_intervention = get_can_projection.get_intervention_for_state(state)
    actuals_ts = combined_datasets.build_us_timeseries_with_all_fields()
    actual_latest = combined_datasets.build_us_latest_with_all_fields()
    state_latest = actual_latest.get_record_for_state(state)

    return CovidActNowStateTimeseries(
        population=state_latest[CommonFields.POPULATION],
        lat=projection_row[rc.LATITUDE],
        long=projection_row[rc.LONGITUDE],
        actuals=_generate_actuals(
            state_latest, state_intervention
        ),
        stateName=projection_row[rc.STATE_FULL_NAME],
        fips=projection_row[rc.FIPS],
        lastUpdatedDate=_format_date(projection_row[rc.LAST_UPDATED]),
        projections=projections,
        timeseries=timeseries,
        actuals_timeseries=_generate_actuals_timeseries(
            actuals_ts.get_records_for_state(state), state_intervention
        ),
    )
Esempio n. 9
0
def generate_county_timeseries(projection_row, intervention, input_dir):
    state_abbrev = US_STATE_ABBREV[projection_row[rc.STATE_FULL_NAME]]
    fips = projection_row[rc.FIPS]

    raw_dataseries = get_can_projection.get_can_raw_data(
        input_dir, state_abbrev, fips, AggregationLevel.COUNTY, intervention
    )

    testing_df = get_testing_timeseries_by_fips(fips)
    new_df = pd.DataFrame(raw_dataseries).merge(testing_df, on="date", how="left")

    can_dataseries = new_df.to_dict(orient="records")

    timeseries = []
    for data_series in can_dataseries:
        timeseries.append(_generate_county_timeseries_row(data_series))
    if len(timeseries) < 1:
        raise Exception(f"County time series empty for {intervention.name}")

    projections = _generate_api_for_projections(projection_row)
    state_intervention = get_can_projection.get_intervention_for_state(state_abbrev)
    actuals_ts = combined_datasets.build_us_timeseries_with_all_fields()
    actual_latest = combined_datasets.build_us_latest_with_all_fields()
    fips_latest = actual_latest.get_record_for_fips(fips)

    return CovidActNowCountyTimeseries(
        population=fips_latest[CommonFields.POPULATION],
        lat=projection_row[rc.LATITUDE],
        long=projection_row[rc.LONGITUDE],
        actuals=_generate_actuals(
            fips_latest, state_intervention
        ),
        stateName=projection_row[rc.STATE_FULL_NAME],
        countyName=projection_row[rc.COUNTY],
        fips=projection_row[rc.FIPS],
        lastUpdatedDate=_format_date(projection_row[rc.LAST_UPDATED]),
        projections=projections,
        timeseries=timeseries,
        actuals_timeseries=_generate_actuals_timeseries(
            actuals_ts.get_records_for_fips(fips), state_intervention
        ),
    )
Esempio n. 10
0
def test_generate_timeseries_for_fips(include_projections,
                                      nyc_model_output_path, nyc_fips):

    us_latest = combined_datasets.build_us_latest_with_all_fields()
    us_timeseries = combined_datasets.build_us_timeseries_with_all_fields()

    nyc_latest = us_latest.get_record_for_fips(nyc_fips)
    nyc_timeseries = us_timeseries.get_subset(None, fips=nyc_fips)
    intervention = Intervention.OBSERVED_INTERVENTION
    model_output = CANPyseirLocationOutput.load_from_path(
        nyc_model_output_path)

    area_summary = generate_api.generate_area_summary(nyc_latest, model_output)
    area_timeseries = generate_api.generate_area_timeseries(
        area_summary, nyc_timeseries, model_output)

    summary = generate_api.generate_area_summary(nyc_latest, model_output)

    assert summary.dict() == area_timeseries.area_summary.dict()
    # Double checking that serialized json does not contain NaNs, all values should
    # be serialized using the simplejson wrapper.
    assert "NaN" not in area_timeseries.json()
Esempio n. 11
0
def test_build_timeseries_and_summary_outputs(nyc_model_output_path, nyc_fips,
                                              intervention):

    us_latest = combined_datasets.build_us_latest_with_all_fields()
    us_timeseries = combined_datasets.build_us_timeseries_with_all_fields()

    timeseries = api_pipeline.build_timeseries_for_fips(
        intervention, us_latest, us_timeseries, nyc_model_output_path.parent,
        nyc_fips)

    if intervention is Intervention.NO_INTERVENTION:
        # Test data does not contain no intervention model, should not output any results.
        assert not timeseries
        return

    assert timeseries

    if intervention is Intervention.STRONG_INTERVENTION:
        assert timeseries.projections
        assert timeseries.timeseries
    elif intervention is Intervention.OBSERVED_INTERVENTION:
        assert not timeseries.projections
        assert not timeseries.timeseries
Esempio n. 12
0
def test_unique_index_values_us_latest():
    latest = combined_datasets.build_us_latest_with_all_fields()
    latest_data = latest.data.set_index(latest.INDEX_FIELDS)
    duplicates = latest_data.index.duplicated()
    assert not sum(duplicates)
Esempio n. 13
0
def _cache_global_datasets():
    # Populate cache for combined latest and timeseries.  Caching pre-fork
    # will make sure cache is populated for subprocesses.  Return value
    # is not needed as the only goal is to populate the cache.
    combined_datasets.build_us_latest_with_all_fields()
    combined_datasets.build_us_timeseries_with_all_fields()
Esempio n. 14
0
def test_combined_county_has_some_data(fips):
    latest = combined_datasets.build_us_latest_with_all_fields().get_subset(
        AggregationLevel.COUNTY, fips=fips)
    assert latest.data[CommonFields.POSITIVE_TESTS].all()
    assert latest.data[CommonFields.NEGATIVE_TESTS].all()
def test_build_summary_for_fips(include_projections, nyc_model_output_path,
                                nyc_fips):

    us_latest = combined_datasets.build_us_latest_with_all_fields()
    nyc_latest = us_latest.get_record_for_fips(nyc_fips)
    model_output = None
    expected_projections = None

    intervention = Intervention.OBSERVED_INTERVENTION
    if include_projections:
        model_output = CANPyseirLocationOutput.load_from_path(
            nyc_model_output_path)
        expected_projections = Projections(
            totalHospitalBeds=ResourceUsageProjection(peakShortfall=0,
                                                      peakDate=datetime.date(
                                                          2020, 4, 15),
                                                      shortageStartDate=None),
            ICUBeds=None,
            Rt=model_output.latest_rt,
            RtCI90=model_output.latest_rt_ci90,
        )
        intervention = Intervention.STRONG_INTERVENTION

    summary = generate_api.generate_region_summary(nyc_latest, model_output)

    expected = RegionSummary(
        population=nyc_latest["population"],
        stateName="New York",
        countyName="New York County",
        fips="36061",
        lat=None,
        long=None,
        actuals=Actuals(
            population=nyc_latest["population"],
            intervention="STRONG_INTERVENTION",
            cumulativeConfirmedCases=nyc_latest["cases"],
            cumulativeDeaths=nyc_latest["deaths"],
            cumulativePositiveTests=nyc_latest["positive_tests"],
            cumulativeNegativeTests=nyc_latest["negative_tests"],
            hospitalBeds={
                # Manually calculated from capacity calculation in generate_api.py
                "capacity": 12763,
                "totalCapacity": nyc_latest["max_bed_count"],
                "currentUsageCovid": 0,
                "currentUsageTotal": None,
                "typicalUsageRate": nyc_latest["all_beds_occupancy_rate"],
            },
            ICUBeds={
                "capacity": nyc_latest["icu_beds"],
                "totalCapacity": nyc_latest["icu_beds"],
                "currentUsageCovid": 0,
                "currentUsageTotal": 0,
                "typicalUsageRate": nyc_latest["icu_occupancy_rate"],
            },
            contactTracers=nyc_latest["contact_tracers_count"],
        ),
        lastUpdatedDate=datetime.datetime.utcnow(),
        projections=expected_projections,
    )
    import pprint

    pprint.pprint(expected.actuals.ICUBeds.dict())
    pprint.pprint(summary.actuals.ICUBeds.dict())
    assert expected.dict() == summary.dict()