예제 #1
0
def deploy_dod_projections(disable_validation, input_state_dir, input_county_dir, output):

    """Generates and runs dod data projections from model outputs.

    Used for manual trigger

    # triggering persistance to s3
    AWS_PROFILE=covidactnow BUCKET_NAME=covidactnow-deleteme python run.py deploy-dod-projections

    # deploy to the data bucket
    AWS_PROFILE=covidactnow BUCKET_NAME=data.covidactnow.org python run.py deploy-dod-projections

    # triggering persistance to local
    python run.py deploy-dod-projections
    """

    for intervention in list(Intervention):
        # TODO(issues/#259): Split up the dod pipeline to process states/counties since states support inference
        # TODO(issues/#258): remove check once counties support inferrence
        if intervention in Intervention.county_supported_interventions():
            logger.info(f"Starting to generate files for {intervention.name}.")

            state_result, county_result = dod_pipeline.run_projections(
                input_state_dir, input_county_dir, intervention, run_validation=not disable_validation
            )
            dod_pipeline.deploy_results(state_result, output)
            dod_pipeline.deploy_results(county_result, output)

    logger.info('finished dod job')
예제 #2
0
def deploy_counties_api(disable_validation, input_dir, output, summary_output):
    """The entry function for invocation"""

    for intervention in list(Intervention):
        # TODO(issues/#258): remove check once counties support inferrence
        if intervention in Intervention.county_supported_interventions():
            county_result = api_pipeline.run_projections(
                input_dir,
                AggregationLevel.COUNTY,
                intervention,
                run_validation=not disable_validation,
            )
            county_summaries, county_timeseries = api_pipeline.generate_api(
                county_result, input_dir
            )
            api_pipeline.deploy_results([*county_summaries, *county_timeseries], output)

            counties_summary = api_pipeline.build_counties_summary(county_summaries, intervention)
            counties_timeseries = api_pipeline.build_counties_timeseries(county_timeseries, intervention)
            summarized_timeseries = api_pipeline.build_prediction_header_timeseries_data(counties_timeseries)
            api_pipeline.deploy_prediction_timeseries_csvs(summarized_timeseries, summary_output)

            api_pipeline.deploy_results([counties_summary], summary_output, write_csv=True)
            api_pipeline.deploy_results([counties_timeseries], summary_output)

        logger.info("finished top counties job")
def _get_intervention_type(intervention_type, state, state_interventions_df):
    if intervention_type == Intervention.SELECTED_MITIGATION.value:
        state_intervention_results = state_interventions_df.loc[
            state_interventions_df["state"] == state]["intervention"]
        if not state_intervention_results.empty:
            intervention_string = state_intervention_results.values[0]
            return Intervention.from_str(intervention_string).value
    return intervention_type
예제 #4
0
    def __init__(self, data):
        data = data.reset_index(drop=True)

        self.fips = data[schema.FIPS].iloc[0]
        self.data = data
        self.intervention = Intervention(data[schema.INTERVENTION].iloc[0])

        self.data["short_fall"] = _calculate_shortfall(
            self.data[schema.ALL_HOSPITALIZED], self.data[schema.BEDS])
예제 #5
0
def get_intervention_for_state(state) -> Intervention:
    return Intervention.from_str(get_interventions()[state])
    def write_region(self, regional_input: RegionalInput) -> None:
        """Generates the CAN UI output format for a given region.

        Args:
            regional_input: the region and its data
        """
        # Get the latest observed values to use in calculating shims
        observed_latest_dict = regional_input.latest

        state = observed_latest_dict[CommonFields.STATE]
        log.info("Mapping output to WebUI.",
                 state=state,
                 fips=regional_input.fips)
        shim_log = structlog.getLogger(fips=regional_input.fips)
        pyseir_outputs = regional_input.ensemble_results()

        try:
            fit_results = regional_input.inference_result()
            t0_simulation = datetime.fromisoformat(fit_results["t0_date"])
        except (KeyError, ValueError):
            log.error("Fit result not found for fips. Skipping...",
                      fips=regional_input.fips)
            return
        population = regional_input.population

        # We will shim all suppression policies by the same amount (since historical tracking error
        # for all policies is the same).
        baseline_policy = "suppression_policy__inferred"  # This could be any valid policy

        # We need the index in the model's temporal frame.
        idx_offset = int(fit_results["t_today"] - fit_results["t0"])

        observed_death_latest = observed_latest_dict[CommonFields.DEATHS]
        observed_total_hosps_latest = observed_latest_dict[
            CommonFields.CURRENT_HOSPITALIZED]
        observed_icu_latest = observed_latest_dict[CommonFields.CURRENT_ICU]

        # For Deaths
        model_death_latest = pyseir_outputs[baseline_policy]["total_deaths"][
            "ci_50"][idx_offset]
        model_acute_latest = pyseir_outputs[baseline_policy]["HGen"]["ci_50"][
            idx_offset]
        model_icu_latest = pyseir_outputs[baseline_policy]["HICU"]["ci_50"][
            idx_offset]
        model_total_hosps_latest = model_acute_latest + model_icu_latest

        death_shim = shim.calculate_strict_shim(
            model=model_death_latest,
            observed=observed_death_latest,
            log=shim_log.bind(type=CommonFields.DEATHS),
        )

        total_hosp_shim = shim.calculate_strict_shim(
            model=model_total_hosps_latest,
            observed=observed_total_hosps_latest,
            log=shim_log.bind(type=CommonFields.CURRENT_HOSPITALIZED),
        )

        # For ICU This one is a little more interesting since we often don't have ICU. In this case
        # we use information from the same aggregation level (intralevel) to keep the ratios
        # between general hospitalization and icu hospitalization
        icu_shim = shim.calculate_intralevel_icu_shim(
            model_acute=model_acute_latest,
            model_icu=model_icu_latest,
            observed_icu=observed_icu_latest,
            observed_total_hosps=observed_total_hosps_latest,
            log=shim_log.bind(type=CommonFields.CURRENT_ICU),
        )

        # Iterate through each suppression policy.
        # Model output is interpolated to the dates desired for the API.
        suppression_policies = [
            key for key in pyseir_outputs.keys()
            if key.startswith("suppression_policy")
        ]
        for suppression_policy in suppression_policies:
            output_for_policy = pyseir_outputs[suppression_policy]
            output_model = pd.DataFrame()
            t_list = output_for_policy["t_list"]
            t_list_downsampled = range(0, int(max(t_list)),
                                       self.output_interval_days)

            output_model[schema.DAY_NUM] = t_list_downsampled
            output_model[schema.DATE] = [
                (t0_simulation + timedelta(days=t)).date().strftime("%Y-%m-%d")
                for t in t_list_downsampled
            ]
            output_model[schema.TOTAL] = population
            output_model[schema.TOTAL_SUSCEPTIBLE] = np.interp(
                t_list_downsampled, t_list, output_for_policy["S"]["ci_50"])
            output_model[schema.EXPOSED] = np.interp(
                t_list_downsampled, t_list, output_for_policy["E"]["ci_50"])
            output_model[schema.INFECTED] = np.interp(
                t_list_downsampled,
                t_list,
                np.add(output_for_policy["I"]["ci_50"],
                       output_for_policy["A"]["ci_50"]),
            )  # Infected + Asympt.
            output_model[schema.INFECTED_A] = output_model[schema.INFECTED]

            interpolated_model_acute_values = np.interp(
                t_list_downsampled, t_list, output_for_policy["HGen"]["ci_50"])
            output_model[schema.INFECTED_B] = interpolated_model_acute_values

            raw_model_icu_values = output_for_policy["HICU"]["ci_50"]
            interpolated_model_icu_values = np.interp(t_list_downsampled,
                                                      t_list,
                                                      raw_model_icu_values)

            # 21 August 2020: The line assigning schema.INFECTED_C in the output_model is
            # commented out while the Linear Regression estimator is patched through this pipeline
            # to be consumed downstream by the ICU utilization calculations. It is left here as a
            # marker for the future if the ICU utilization calculations is dis-entangled from the
            # PySEIR model outputs.

            output_model[schema.INFECTED_C] = (
                icu_shim + interpolated_model_icu_values).clip(min=0)

            # General + ICU beds. don't include vent here because they are also counted in ICU
            output_model[schema.ALL_HOSPITALIZED] = (
                interpolated_model_acute_values +
                interpolated_model_icu_values + total_hosp_shim).clip(min=0)

            output_model[schema.ALL_INFECTED] = output_model[schema.INFECTED]

            # Shim Deaths to Match Observed
            raw_model_deaths_values = output_for_policy["total_deaths"][
                "ci_50"]
            interp_model_deaths_values = np.interp(t_list_downsampled, t_list,
                                                   raw_model_deaths_values)
            output_model[schema.DEAD] = (interp_model_deaths_values +
                                         death_shim).clip(min=0)

            # Continue mapping
            final_beds = np.mean(output_for_policy["HGen"]["capacity"])
            output_model[schema.BEDS] = final_beds
            output_model[schema.CUMULATIVE_INFECTED] = np.interp(
                t_list_downsampled,
                t_list,
                np.cumsum(output_for_policy["total_new_infections"]["ci_50"]),
            )

            if fit_results:
                output_model[schema.Rt] = np.interp(
                    t_list_downsampled,
                    t_list,
                    fit_results["eps2"] * fit_results["R0"] *
                    np.ones(len(t_list)),
                )
                output_model[schema.Rt_ci90] = np.interp(
                    t_list_downsampled,
                    t_list,
                    2 * fit_results["eps2_error"] * fit_results["R0"] *
                    np.ones(len(t_list)),
                )
            else:
                output_model[schema.Rt] = 0
                output_model[schema.Rt_ci90] = 0

            output_model[schema.CURRENT_VENTILATED] = (
                icu_shim +
                np.interp(t_list_downsampled, t_list,
                          output_for_policy["HVent"]["ci_50"])).clip(min=0)
            output_model[schema.POPULATION] = population
            # Average capacity.
            output_model[schema.ICU_BED_CAPACITY] = np.mean(
                output_for_policy["HICU"]["capacity"])
            output_model[schema.VENTILATOR_CAPACITY] = np.mean(
                output_for_policy["HVent"]["capacity"])

            # Truncate date range of output.
            output_dates = pd.to_datetime(output_model["date"])
            output_model = output_model[
                (output_dates >= datetime(month=3, day=3, year=2020))
                & (output_dates < datetime.today() + timedelta(days=90))]

            # Fill in results for the Rt indicator.
            rt_results = regional_input.inferred_infection_rate()

            if rt_results is None or rt_results.empty:
                log.warning(
                    "No Rt Results found, clearing Rt in output.",
                    fips=regional_input.fips,
                    suppression_policy=suppression_policy,
                )
                output_model[schema.RT_INDICATOR] = "NaN"
                output_model[schema.RT_INDICATOR_CI90] = "NaN"
            else:
                rt_results.index = rt_results["date"].dt.strftime("%Y-%m-%d")
                merged = output_model.merge(
                    rt_results[["Rt_MAP_composite", "Rt_ci95_composite"]],
                    right_index=True,
                    left_on="date",
                    how="left",
                )
                output_model[schema.RT_INDICATOR] = merged["Rt_MAP_composite"]

                # With 90% probability the value is between rt_indicator - ci90
                # to rt_indicator + ci90
                output_model[schema.RT_INDICATOR_CI90] = (
                    merged["Rt_ci95_composite"] - merged["Rt_MAP_composite"])

            output_model[[schema.RT_INDICATOR,
                          schema.RT_INDICATOR_CI90]] = output_model[[
                              schema.RT_INDICATOR, schema.RT_INDICATOR_CI90
                          ]].fillna("NaN")

            int_columns = [
                col for col in output_model.columns if col not in (
                    schema.DATE,
                    schema.Rt,
                    schema.Rt_ci90,
                    schema.RT_INDICATOR,
                    schema.RT_INDICATOR_CI90,
                    schema.FIPS,
                )
            ]
            # Casing floats to ints and then replacing filled in zeros with NaN values instead of
            # propagating zeros.
            na_int_columns = output_model.loc[:, int_columns].isna()
            output_model.loc[:,
                             int_columns] = output_model[int_columns].fillna(
                                 0).astype(int)
            output_model[na_int_columns] = np.nan
            output_model.loc[:, [
                schema.Rt, schema.Rt_ci90, schema.RT_INDICATOR, schema.
                RT_INDICATOR_CI90
            ]] = output_model[[
                schema.Rt, schema.Rt_ci90, schema.RT_INDICATOR,
                schema.RT_INDICATOR_CI90
            ]]

            output_model[schema.FIPS] = regional_input.fips
            intervention = Intervention.from_webui_data_adaptor(
                suppression_policy)
            output_model[schema.INTERVENTION] = intervention.value
            output_path = get_run_artifact_path(regional_input.region,
                                                RunArtifact.WEB_UI_RESULT,
                                                output_dir=self.output_dir)
            output_path = output_path.replace("__INTERVENTION_IDX__",
                                              str(intervention.value))
            output_model.to_json(output_path, orient=OUTPUT_JSON_ORIENT)
예제 #7
0
    def map_fips(self, fips):
        """
        For a given county fips code, generate the CAN UI output format.

        Parameters
        ----------
        fips: str
            County FIPS code to map.
        """
        logging.info(f"Mapping output to WebUI for {self.state}, {fips}")
        pyseir_outputs = load_data.load_ensemble_results(fips)

        if len(fips) == 5 and fips not in self.df_whitelist.fips.values:
            logging.info(f"Excluding {fips} due to white list...")
            return
        try:
            fit_results = load_inference_result(fips)
            t0_simulation = datetime.fromisoformat(fit_results["t0_date"])
        except (KeyError, ValueError):
            logging.error(f"Fit result not found for {fips}. Skipping...")
            return

        # ---------------------------------------------------------------------
        # Rescale hosps based on the population ratio... Could swap this to
        # infection ratio later?
        # ---------------------------------------------------------------------
        hosp_times, current_hosp, _ = load_data.load_hospitalization_data_by_state(
            state=self.state_abbreviation,
            t0=t0_simulation,
            convert_cumulative_to_current=True,
            category="hospitalized",
        )

        _, current_icu, _ = load_data.load_hospitalization_data_by_state(
            state=self.state_abbreviation,
            t0=t0_simulation,
            convert_cumulative_to_current=True,
            category="icu",
        )

        if len(fips) == 5:
            population = self.population_data.get_record_for_fips(fips)[CommonFields.POPULATION]
        else:
            population = self.population_data.get_record_for_state(self.state_abbreviation)[
                CommonFields.POPULATION
            ]

        # logging.info(f'Mapping output to WebUI for {self.state}, {fips}')
        # pyseir_outputs = load_data.load_ensemble_results(fips)
        # if pyseir_outputs is None:
        #     logging.warning(f'Fit result not found for {fips}: Skipping county')
        #     return None

        policies = [key for key in pyseir_outputs.keys() if key.startswith("suppression_policy")]
        if current_hosp is not None:
            t_latest_hosp_data, current_hosp = hosp_times[-1], current_hosp[-1]
            t_latest_hosp_data_date = t0_simulation + timedelta(days=int(t_latest_hosp_data))

            state_hosp_gen = load_data.get_compartment_value_on_date(
                fips=fips[:2], compartment="HGen", date=t_latest_hosp_data_date
            )
            state_hosp_icu = load_data.get_compartment_value_on_date(
                fips=fips[:2], compartment="HICU", date=t_latest_hosp_data_date
            )

            if len(fips) == 5:
                # Rescale the county level hospitalizations by the expected
                # ratio of county / state hospitalizations from simulations.
                # We use ICU data if available too.
                county_hosp = load_data.get_compartment_value_on_date(
                    fips=fips,
                    compartment="HGen",
                    date=t_latest_hosp_data_date,
                    ensemble_results=pyseir_outputs,
                )
                county_icu = load_data.get_compartment_value_on_date(
                    fips=fips,
                    compartment="HICU",
                    date=t_latest_hosp_data_date,
                    ensemble_results=pyseir_outputs,
                )
                current_hosp *= (county_hosp + county_icu) / (state_hosp_gen + state_hosp_icu)

            hosp_rescaling_factor = current_hosp / (state_hosp_gen + state_hosp_icu)

            # Some states have covidtracking issues. We shouldn't ground ICU cases
            # to zero since so far these have all been bad reporting.
            if current_icu is not None and current_icu[-1] > 0:
                icu_rescaling_factor = current_icu[-1] / state_hosp_icu
            else:
                icu_rescaling_factor = current_hosp / (state_hosp_gen + state_hosp_icu)
        else:
            hosp_rescaling_factor = 1.0
            icu_rescaling_factor = 1.0

        # Iterate through each suppression policy.
        # Model output is interpolated to the dates desired for the API.
        for i_policy, suppression_policy in enumerate(
            [key for key in pyseir_outputs.keys() if key.startswith("suppression_policy")]
        ):

            output_for_policy = pyseir_outputs[suppression_policy]
            output_model = pd.DataFrame()

            t_list = output_for_policy["t_list"]
            t_list_downsampled = range(0, int(max(t_list)), self.output_interval_days)

            output_model[schema.DAY_NUM] = t_list_downsampled
            output_model[schema.DATE] = [
                (t0_simulation + timedelta(days=t)).date().strftime("%m/%d/%y")
                for t in t_list_downsampled
            ]
            output_model[schema.TOTAL] = population
            output_model[schema.TOTAL_SUSCEPTIBLE] = np.interp(
                t_list_downsampled, t_list, output_for_policy["S"]["ci_50"]
            )
            output_model[schema.EXPOSED] = np.interp(
                t_list_downsampled, t_list, output_for_policy["E"]["ci_50"]
            )
            output_model[schema.INFECTED] = np.interp(
                t_list_downsampled,
                t_list,
                np.add(output_for_policy["I"]["ci_50"], output_for_policy["A"]["ci_50"]),
            )  # Infected + Asympt.
            output_model[schema.INFECTED_A] = output_model[schema.INFECTED]
            output_model[schema.INFECTED_B] = hosp_rescaling_factor * np.interp(
                t_list_downsampled, t_list, output_for_policy["HGen"]["ci_50"]
            )  # Hosp General
            output_model[schema.INFECTED_C] = icu_rescaling_factor * np.interp(
                t_list_downsampled, t_list, output_for_policy["HICU"]["ci_50"]
            )  # Hosp ICU
            # General + ICU beds. don't include vent here because they are also counted in ICU
            output_model[schema.ALL_HOSPITALIZED] = np.add(
                output_model[schema.INFECTED_B], output_model[schema.INFECTED_C]
            )
            output_model[schema.ALL_INFECTED] = output_model[schema.INFECTED]
            output_model[schema.DEAD] = np.interp(
                t_list_downsampled, t_list, output_for_policy["total_deaths"]["ci_50"]
            )
            final_beds = np.mean(output_for_policy["HGen"]["capacity"])
            output_model[schema.BEDS] = final_beds
            output_model[schema.CUMULATIVE_INFECTED] = np.interp(
                t_list_downsampled,
                t_list,
                np.cumsum(output_for_policy["total_new_infections"]["ci_50"]),
            )

            if fit_results:
                output_model[schema.Rt] = np.interp(
                    t_list_downsampled,
                    t_list,
                    fit_results["eps"] * fit_results["R0"] * np.ones(len(t_list)),
                )
                output_model[schema.Rt_ci90] = np.interp(
                    t_list_downsampled,
                    t_list,
                    2 * fit_results["eps_error"] * fit_results["R0"] * np.ones(len(t_list)),
                )
            else:
                output_model[schema.Rt] = 0
                output_model[schema.Rt_ci90] = 0

            output_model[schema.CURRENT_VENTILATED] = icu_rescaling_factor * np.interp(
                t_list_downsampled, t_list, output_for_policy["HVent"]["ci_50"]
            )
            output_model[schema.POPULATION] = population
            # Average capacity.
            output_model[schema.ICU_BED_CAPACITY] = np.mean(output_for_policy["HICU"]["capacity"])
            output_model[schema.VENTILATOR_CAPACITY] = np.mean(
                output_for_policy["HVent"]["capacity"]
            )

            # Truncate date range of output.
            output_dates = pd.to_datetime(output_model["date"])
            output_model = output_model[
                (output_dates >= datetime(month=3, day=3, year=2020))
                & (output_dates < datetime.today() + timedelta(days=90))
            ]
            output_model = output_model.fillna(0)

            # Fill in results for the Rt indicator.
            try:
                rt_results = load_Rt_result(fips)
                rt_results.index = rt_results["Rt_MAP_composite"].index.strftime("%m/%d/%y")
                merged = output_model.merge(
                    rt_results[["Rt_MAP_composite", "Rt_ci95_composite"]],
                    right_index=True,
                    left_on="date",
                    how="left",
                )
                output_model[schema.RT_INDICATOR] = merged["Rt_MAP_composite"]

                # With 90% probability the value is between rt_indicator - ci90 to rt_indicator + ci90
                output_model[schema.RT_INDICATOR_CI90] = (
                    merged["Rt_ci95_composite"] - merged["Rt_MAP_composite"]
                )
            except (ValueError, KeyError) as e:
                output_model[schema.RT_INDICATOR] = "NaN"
                output_model[schema.RT_INDICATOR_CI90] = "NaN"

            output_model[[schema.RT_INDICATOR, schema.RT_INDICATOR_CI90]] = output_model[
                [schema.RT_INDICATOR, schema.RT_INDICATOR_CI90]
            ].fillna("NaN")

            # Truncate floats and cast as strings to match data model.
            int_columns = [
                col
                for col in output_model.columns
                if col
                not in (
                    schema.DATE,
                    schema.Rt,
                    schema.Rt_ci90,
                    schema.RT_INDICATOR,
                    schema.RT_INDICATOR_CI90,
                )
            ]
            output_model.loc[:, int_columns] = (
                output_model[int_columns].fillna(0).astype(int).astype(str)
            )
            output_model.loc[
                :, [schema.Rt, schema.Rt_ci90, schema.RT_INDICATOR, schema.RT_INDICATOR_CI90]
            ] = (
                output_model[
                    [schema.Rt, schema.Rt_ci90, schema.RT_INDICATOR, schema.RT_INDICATOR_CI90]
                ]
                .fillna(0)
                .round(decimals=4)
                .astype(str)
            )

            # Convert the records format to just list(list(values))
            output_model = [
                [val for val in timestep.values()]
                for timestep in output_model.to_dict(orient="records")
            ]

            output_path = get_run_artifact_path(
                fips, RunArtifact.WEB_UI_RESULT, output_dir=self.output_dir
            )
            policy_enum = Intervention.from_webui_data_adaptor(suppression_policy)
            output_path = output_path.replace("__INTERVENTION_IDX__", str(policy_enum.value))
            with open(output_path, "w") as f:
                json.dump(output_model, f)
    def map_fips(self, fips):
        """
        For a given county fips code, generate the CAN UI output format.

        Parameters
        ----------
        fips: str
            County FIPS code to map.
        """
        if len(fips) == 5:
            population = self.population_data.get_county_level(
                'USA', state=self.state_abbreviation, fips=fips)
        else:
            population = self.population_data.get_state_level(
                'USA', state=self.state_abbreviation)

        logging.info(f'Mapping output to WebUI for {self.state}, {fips}')
        pyseir_outputs = load_data.load_ensemble_results(fips)

        policies = [
            key for key in pyseir_outputs.keys()
            if key.startswith('suppression_policy')
        ]

        all_hospitalized_today = None
        try:
            fit_results = load_inference_result(fips)
        except ValueError:
            fit_results = None
            logging.error(
                f'Fit result not found for {fips}: Skipping inference elements'
            )

        for i_policy, suppression_policy in enumerate(policies):
            if suppression_policy == 'suppression_policy__full_containment':  # No longer shipping this.
                continue
            output_for_policy = pyseir_outputs[suppression_policy]
            output_model = pd.DataFrame()

            if suppression_policy == 'suppression_policy__inferred' and fit_results:
                if len(fips) == 5 and fips not in self.df_whitelist.fips:
                    continue

                t0 = datetime.fromisoformat(fit_results['t0_date'])

                # Hospitalizations need to be rescaled by the inferred factor to match observations for display.
                now_idx = int(
                    (datetime.today() -
                     datetime.fromisoformat(fit_results['t0_date'])).days)
                total_hosps = output_for_policy['HGen']['ci_50'][
                    now_idx] + output_for_policy['HICU']['ci_50'][now_idx]
                hosp_fraction = all_hospitalized_today / total_hosps

            else:
                t0 = datetime.today()
                hosp_fraction = 1.

            t_list = output_for_policy['t_list']
            t_list_downsampled = range(0, int(max(t_list)),
                                       self.output_interval_days)
            # Col 0
            output_model['days'] = t_list_downsampled
            # Col 1
            output_model['date'] = [
                (t0 + timedelta(days=t)).date().strftime('%m/%d/%y')
                for t in t_list_downsampled
            ]
            # Col 2
            output_model['t'] = population
            # Col 3
            output_model['b'] = np.interp(t_list_downsampled, t_list,
                                          output_for_policy['S']['ci_50'])
            # Col 4
            output_model['c'] = np.interp(t_list_downsampled, t_list,
                                          output_for_policy['E']['ci_50'])
            # Col 5
            output_model['d'] = np.interp(
                t_list_downsampled, t_list,
                np.add(output_for_policy['I']['ci_50'],
                       output_for_policy['A']['ci_50']))  # Infected + Asympt.
            # Col 6
            output_model['e'] = output_model['d']
            # Col 7
            output_model['f'] = np.interp(
                t_list_downsampled, t_list,
                output_for_policy['HGen']['ci_50'])  # Hosp General
            # Col 8
            output_model['g'] = np.interp(
                t_list_downsampled, t_list,
                output_for_policy['HICU']['ci_50'])  # Hosp ICU
            # Col 9
            output_model['all_hospitalized'] = hosp_fraction * np.add(
                output_model['f'], output_model['g'])
            # Col 10
            output_model['all_infected'] = output_model['d']
            # Col 11
            output_model['dead'] = np.interp(
                t_list_downsampled, t_list,
                output_for_policy['total_deaths']['ci_50'])
            # Col 12
            final_beds = np.mean(
                output_for_policy['HGen']['capacity']) + np.mean(
                    output_for_policy['HICU']['capacity'])
            output_model['beds'] = final_beds
            output_model['cumulative_infected'] = np.interp(
                t_list_downsampled, t_list,
                np.cumsum(output_for_policy['total_new_infections']['ci_50']))

            if fit_results:
                output_model['R_t'] = np.interp(
                    t_list_downsampled, t_list, fit_results['eps'] *
                    fit_results['R0'] * np.ones(len(t_list)))
                output_model['R_t_stdev'] = np.interp(
                    t_list_downsampled, t_list, fit_results['eps_error'] *
                    fit_results['R0'] * np.ones(len(t_list)))
            else:
                output_model['R_t'] = 0
                output_model['R_t_stdev'] = 0

            # Record the current number of hospitalizations in order to rescale the inference results.
            all_hospitalized_today = output_model['all_hospitalized'][0]

            # Don't backfill inferences
            if suppression_policy != 'suppression_policy__inferred':
                backfill = self.backfill_output_model_fips(
                    fips, t0, final_beds, output_model)
                output_model = pd.concat([
                    backfill, output_model
                ])[output_model.columns].reset_index(drop=True)

            # Truncate date range of output.
            output_dates = pd.to_datetime(output_model['date'])
            output_model = output_model[
                (output_dates > datetime(month=3, day=3, year=2020))
                & (output_dates < datetime.today() + timedelta(days=90))]
            output_model = output_model.fillna(0)

            for col in ['l']:
                output_model[col] = 0
            output_model['population'] = population
            for col in ['m', 'n']:
                output_model[col] = 0

            # Truncate floats and cast as strings to match data model.
            int_columns = [
                col for col in output_model.columns
                if col not in ('date', 'R_t', 'R_t_stdev')
            ]
            output_model.loc[:,
                             int_columns] = output_model[int_columns].fillna(
                                 0).astype(int).astype(str)
            output_model.loc[:, ['R_t', 'R_t_stdev']] = output_model[[
                'R_t', 'R_t_stdev'
            ]].fillna(0).round(decimals=4).astype(str)

            # Convert the records format to just list(list(values))
            output_model = [[
                val for val in timestep.values()
            ] for timestep in output_model.to_dict(orient='records')]

            output_path = get_run_artifact_path(fips,
                                                RunArtifact.WEB_UI_RESULT,
                                                output_dir=self.output_dir)
            policy_enum = Intervention.from_webui_data_adaptor(
                suppression_policy)
            output_path = output_path.replace('__INTERVENTION_IDX__',
                                              str(policy_enum.value))
            with open(output_path, 'w') as f:
                json.dump(output_model, f)
    def map_fips(self, fips: str) -> None:
        """
        For a given fips code, for either a county or state, generate the CAN UI output format.

        Parameters
        ----------
        fips: str
            FIPS code to map.
        """
        log.info("Mapping output to WebUI.", state=self.state, fips=fips)
        pyseir_outputs = load_data.load_ensemble_results(fips)

        try:
            fit_results = load_inference_result(fips)
            t0_simulation = datetime.fromisoformat(fit_results["t0_date"])
        except (KeyError, ValueError):
            log.error("Fit result not found for fips. Skipping...", fips=fips)
            return
        population = self._get_population(fips)

        # Get multiplicative conversion factors to scale model output to fit dataset current values
        hosp_rescaling_factor, icu_rescaling_factor = self._get_model_to_dataset_conversion_factors(
            t0_simulation=t0_simulation,
            fips=fips,
            pyseir_outputs=pyseir_outputs,
        )

        # Iterate through each suppression policy.
        # Model output is interpolated to the dates desired for the API.
        suppression_policies = [
            key for key in pyseir_outputs.keys()
            if key.startswith("suppression_policy")
        ]
        for suppression_policy in suppression_policies:
            output_for_policy = pyseir_outputs[suppression_policy]
            output_model = pd.DataFrame()

            t_list = output_for_policy["t_list"]
            t_list_downsampled = range(0, int(max(t_list)),
                                       self.output_interval_days)

            output_model[schema.DAY_NUM] = t_list_downsampled
            output_model[schema.DATE] = [
                (t0_simulation + timedelta(days=t)).date().strftime("%m/%d/%y")
                for t in t_list_downsampled
            ]
            output_model[schema.TOTAL] = population
            output_model[schema.TOTAL_SUSCEPTIBLE] = np.interp(
                t_list_downsampled, t_list, output_for_policy["S"]["ci_50"])
            output_model[schema.EXPOSED] = np.interp(
                t_list_downsampled, t_list, output_for_policy["E"]["ci_50"])
            output_model[schema.INFECTED] = np.interp(
                t_list_downsampled,
                t_list,
                np.add(output_for_policy["I"]["ci_50"],
                       output_for_policy["A"]["ci_50"]),
            )  # Infected + Asympt.
            output_model[schema.INFECTED_A] = output_model[schema.INFECTED]
            output_model[
                schema.INFECTED_B] = hosp_rescaling_factor * np.interp(
                    t_list_downsampled, t_list,
                    output_for_policy["HGen"]["ci_50"])  # Hosp General

            raw_model_icu_values = output_for_policy["HICU"]["ci_50"]
            interpolated_model_icu_values = np.interp(t_list_downsampled,
                                                      t_list,
                                                      raw_model_icu_values)
            final_derived_model_value = icu_rescaling_factor * interpolated_model_icu_values
            output_model[schema.INFECTED_C] = final_derived_model_value
            # General + ICU beds. don't include vent here because they are also counted in ICU
            output_model[schema.ALL_HOSPITALIZED] = np.add(
                output_model[schema.INFECTED_B],
                output_model[schema.INFECTED_C])
            output_model[schema.ALL_INFECTED] = output_model[schema.INFECTED]
            output_model[schema.DEAD] = np.interp(
                t_list_downsampled, t_list,
                output_for_policy["total_deaths"]["ci_50"])
            final_beds = np.mean(output_for_policy["HGen"]["capacity"])
            output_model[schema.BEDS] = final_beds
            output_model[schema.CUMULATIVE_INFECTED] = np.interp(
                t_list_downsampled,
                t_list,
                np.cumsum(output_for_policy["total_new_infections"]["ci_50"]),
            )

            if fit_results:
                output_model[schema.Rt] = np.interp(
                    t_list_downsampled,
                    t_list,
                    fit_results["eps"] * fit_results["R0"] *
                    np.ones(len(t_list)),
                )
                output_model[schema.Rt_ci90] = np.interp(
                    t_list_downsampled,
                    t_list,
                    2 * fit_results["eps_error"] * fit_results["R0"] *
                    np.ones(len(t_list)),
                )
            else:
                output_model[schema.Rt] = 0
                output_model[schema.Rt_ci90] = 0

            output_model[
                schema.CURRENT_VENTILATED] = icu_rescaling_factor * np.interp(
                    t_list_downsampled, t_list,
                    output_for_policy["HVent"]["ci_50"])
            output_model[schema.POPULATION] = population
            # Average capacity.
            output_model[schema.ICU_BED_CAPACITY] = np.mean(
                output_for_policy["HICU"]["capacity"])
            output_model[schema.VENTILATOR_CAPACITY] = np.mean(
                output_for_policy["HVent"]["capacity"])

            # Truncate date range of output.
            output_dates = pd.to_datetime(output_model["date"])
            output_model = output_model[
                (output_dates >= datetime(month=3, day=3, year=2020))
                & (output_dates < datetime.today() + timedelta(days=90))]
            output_model = output_model.fillna(0)

            # Fill in results for the Rt indicator.
            try:
                rt_results = load_Rt_result(fips)
                rt_results.index = rt_results[
                    "Rt_MAP_composite"].index.strftime("%m/%d/%y")
                merged = output_model.merge(
                    rt_results[["Rt_MAP_composite", "Rt_ci95_composite"]],
                    right_index=True,
                    left_on="date",
                    how="left",
                )
                output_model[schema.RT_INDICATOR] = merged["Rt_MAP_composite"]

                # With 90% probability the value is between rt_indicator - ci90
                # to rt_indicator + ci90
                output_model[schema.RT_INDICATOR_CI90] = (
                    merged["Rt_ci95_composite"] - merged["Rt_MAP_composite"])
            except (ValueError, KeyError) as e:
                log.warning("Clearing Rt in output for fips.",
                            fips=fips,
                            exc_info=e)
                output_model[schema.RT_INDICATOR] = "NaN"
                output_model[schema.RT_INDICATOR_CI90] = "NaN"

            output_model[[schema.RT_INDICATOR,
                          schema.RT_INDICATOR_CI90]] = output_model[[
                              schema.RT_INDICATOR, schema.RT_INDICATOR_CI90
                          ]].fillna("NaN")

            # Truncate floats and cast as strings to match data model.
            int_columns = [
                col for col in output_model.columns if col not in (
                    schema.DATE,
                    schema.Rt,
                    schema.Rt_ci90,
                    schema.RT_INDICATOR,
                    schema.RT_INDICATOR_CI90,
                )
            ]
            output_model.loc[:, int_columns] = (
                output_model[int_columns].fillna(0).astype(int).astype(str))
            output_model.loc[:, [
                schema.Rt, schema.Rt_ci90, schema.RT_INDICATOR, schema.
                RT_INDICATOR_CI90
            ]] = (output_model[[
                schema.Rt, schema.Rt_ci90, schema.RT_INDICATOR,
                schema.RT_INDICATOR_CI90
            ]].fillna(0).round(decimals=4).astype(str))

            # Convert the records format to just list(list(values))
            output_model = [[
                val for val in timestep.values()
            ] for timestep in output_model.to_dict(orient="records")]

            output_path = get_run_artifact_path(fips,
                                                RunArtifact.WEB_UI_RESULT,
                                                output_dir=self.output_dir)
            policy_enum = Intervention.from_webui_data_adaptor(
                suppression_policy)
            output_path = output_path.replace("__INTERVENTION_IDX__",
                                              str(policy_enum.value))
            with open(output_path, "w") as f:
                json.dump(output_model, f)
def get_intervention_for_state(state):
    # TODO: read this from a dataset class
    interventions_url = "https://raw.githubusercontent.com/covid-projections/covid-projections/master/src/assets/data/interventions.json"
    interventions = requests.get(interventions_url).json()
    return Intervention.from_str(interventions[state])