Beispiel #1
0
def do_beta_forecast(app_metadata: cli_tools.Metadata,
                     forecast_specification: ForecastSpecification,
                     preprocess_only: bool):
    logger.debug('Starting beta forecast.')

    data_interface = ForecastDataInterface.from_specification(forecast_specification)

    # Check scenario covariates the same as regression covariates and that
    # covariate data versions match.
    covariates = data_interface.check_covariates(forecast_specification.scenarios)

    data_interface.make_dirs()
    # Fixme: Inconsistent data writing interfaces
    forecast_specification.dump(data_interface.forecast_paths.forecast_specification)

    if not preprocess_only:
        forecast_wf = ForecastWorkflow(forecast_specification.data.output_root)
        n_draws = data_interface.get_n_draws()

        forecast_wf.attach_tasks(n_draws=n_draws,
                                 scenarios=forecast_specification.scenarios,
                                 covariates=covariates)
        try:
            forecast_wf.run()
        except WorkflowAlreadyComplete:
            logger.info('Workflow already complete')
def run_resample_map(forecast_version: str) -> None:
    forecast_spec = ForecastSpecification.from_path(
        Path(forecast_version) / static_vars.FORECAST_SPECIFICATION_FILE)
    resampling_params = forecast_spec.postprocessing.resampling
    data_interface = ForecastDataInterface.from_specification(forecast_spec)
    deaths, *_ = pp.load_output_data(resampling_params['reference_scenario'],
                                     data_interface)
    deaths = pd.concat(deaths, axis=1)
    resampling_map = pp.build_resampling_map(deaths, resampling_params)
    data_interface.save_resampling_map(resampling_map)
def run_mean_level_mandate_reimposition(forecast_version: str,
                                        scenario_name: str,
                                        reimposition_number: int):
    logger.info(
        f"Initiating SEIIR mean mean level mandate reimposition {reimposition_number} "
        f"for scenario {scenario_name}.")
    forecast_spec: ForecastSpecification = ForecastSpecification.from_path(
        Path(forecast_version) / static_vars.FORECAST_SPECIFICATION_FILE)
    scenario_spec = forecast_spec.scenarios[scenario_name]
    data_interface = ForecastDataInterface.from_specification(forecast_spec)

    resampling_map = data_interface.load_resampling_map()
    deaths = pp.load_deaths(scenario_name, data_interface)
    deaths = pd.concat(deaths, axis=1)
    deaths = pp.resample_draws(deaths, resampling_map)
    deaths = pp.summarize(deaths)
    deaths = deaths['mean'].rename('deaths').reset_index()
    deaths['date'] = pd.to_datetime(deaths['date'])

    modeled_locations = deaths['location_id'].unique().tolist()
    deaths = deaths.set_index(['location_id', 'date'])

    population = pp.load_populations(data_interface)
    population = population[population.location_id.isin(modeled_locations)
                            & (population.age_group_id == 22)
                            & (population.sex_id
                               == 3)].set_index('location_id')['population']

    min_wait, days_on, reimposition_threshold = model.unpack_parameters(
        scenario_spec.algorithm_params)

    previous_dates = pd.Series(pd.NaT, index=population.index)
    for previous_reimposition in range(reimposition_number - 1, 0, -1):
        these_dates = data_interface.load_reimposition_dates(
            scenario=scenario_name, reimposition_number=previous_reimposition)
        these_dates = pd.to_datetime(
            these_dates.set_index('location_id')['reimposition_date'])
        these_dates = these_dates.reindex(previous_dates.index)
        this_reimposition = previous_dates.isnull() & these_dates.notnull()
        previous_dates.loc[this_reimposition] = these_dates.loc[
            this_reimposition]
    last_reimposition_end_date = previous_dates + days_on
    reimposition_date = model.compute_reimposition_date(
        deaths, population, reimposition_threshold, min_wait,
        last_reimposition_end_date)
    data_interface.save_reimposition_dates(
        reimposition_date.reset_index(),
        scenario=scenario_name,
        reimposition_number=reimposition_number)
Beispiel #4
0
def run_seir_postprocessing(forecast_version: str, scenario_name: str, measure: str) -> None:
    logger.info(f'Starting postprocessing for forecast version {forecast_version}, scenario {scenario_name}.')
    forecast_spec = ForecastSpecification.from_path(
        Path(forecast_version) / static_vars.FORECAST_SPECIFICATION_FILE
    )
    scenario_spec = forecast_spec.scenarios[scenario_name]
    data_interface = ForecastDataInterface.from_specification(forecast_spec)
    resampling_map = data_interface.load_resampling_map()

    if measure in MEASURES:
        postprocess_measure(data_interface, resampling_map, scenario_name, measure)
    elif measure in COVARIATES:
        postprocess_covariate(data_interface, resampling_map, scenario_spec, scenario_name, measure)
    elif measure in MISCELLANEOUS:
        postprocess_miscellaneous(data_interface, scenario_name, measure)
    else:
        raise NotImplementedError(f'Unknown measure {measure}.')

    logger.info('**DONE**')
Beispiel #5
0
def run_beta_forecast(draw_id: int, forecast_version: str, scenario_name: str,
                      **kwargs):
    logger.info(
        f"Initiating SEIIR beta forecasting for scenario {scenario_name}, draw {draw_id}."
    )
    forecast_spec: ForecastSpecification = ForecastSpecification.from_path(
        Path(forecast_version) / static_vars.FORECAST_SPECIFICATION_FILE)
    scenario_spec = forecast_spec.scenarios[scenario_name]
    data_interface = ForecastDataInterface.from_specification(forecast_spec)

    logger.info('Loading input data.')
    location_ids = data_interface.load_location_ids()
    # Thetas are a parameter generated from assumption or OOS predictive
    # validity testing to curtail some of the bad behavior of the model.
    thetas = data_interface.load_thetas(scenario_spec.theta)
    # Grab the last day of data in the model by location id.  This will
    # correspond to the initial condition for the projection.
    transition_date = data_interface.load_transition_date(draw_id)

    # We'll use the beta and SEIR compartments from this data set to get
    # the ODE initial condition.
    beta_regression_df = data_interface.load_beta_regression(
        draw_id).set_index('location_id').sort_index()
    past_components = beta_regression_df[['date', 'beta'] +
                                         static_vars.SEIIR_COMPARTMENTS]

    # Select out the initial condition using the day of transition.
    transition_day = past_components['date'] == transition_date.loc[
        past_components.index]
    initial_condition = past_components.loc[transition_day,
                                            static_vars.SEIIR_COMPARTMENTS]
    before_model = past_components['date'] < transition_date.loc[
        past_components.index]
    past_components = past_components[before_model]

    # Covariates and coefficients, and scaling parameters are
    # used to compute beta hat in the future.
    covariates = data_interface.load_covariates(scenario_spec, location_ids)
    coefficients = data_interface.load_regression_coefficients(draw_id)

    # Grab the projection of the covariates into the future, keeping the
    # day of transition from past model to future model.
    covariates = covariates.set_index('location_id').sort_index()
    the_future = covariates['date'] >= transition_date.loc[covariates.index]
    covariate_pred = covariates.loc[the_future].reset_index()

    beta_scales = data_interface.load_beta_scales(scenario=scenario_name,
                                                  draw_id=draw_id)

    # We'll use the same params in the ODE forecast as we did in the fit.
    beta_params = data_interface.load_beta_params(draw_id=draw_id)

    # We'll need this to compute deaths and to splice with the forecasts.
    infection_data = data_interface.load_infection_data(draw_id)

    if ((1 < thetas) | thetas < -1).any():
        raise ValueError('Theta must be between -1 and 1.')
    if (beta_params['sigma'] - thetas >= 1).any():
        raise ValueError('Sigma - theta must be smaller than 1')

    # Modeling starts
    logger.info('Forecasting beta and components.')
    betas = model.forecast_beta(covariate_pred, coefficients, beta_scales)
    future_components = model.run_normal_ode_model_by_location(
        initial_condition, beta_params, betas, thetas, location_ids,
        scenario_spec.solver, scenario_spec.system)
    logger.info('Processing ODE results and computing deaths and infections.')
    components, infections, deaths, r_effective = model.compute_output_metrics(
        infection_data, past_components, future_components, thetas,
        beta_params, scenario_spec.system)

    if scenario_spec.algorithm == 'draw_level_mandate_reimposition':
        logger.info('Entering mandate reimposition.')
        # Info data specific to mandate reimposition
        percent_mandates = data_interface.load_covariate_info(
            'mobility', 'mandate_lift', location_ids)
        mandate_effect = data_interface.load_covariate_info(
            'mobility', 'effect', location_ids)
        min_wait, days_on, reimposition_threshold = model.unpack_parameters(
            scenario_spec.algorithm_params)

        population = (components[static_vars.SEIIR_COMPARTMENTS].sum(
            axis=1).rename('population').groupby('location_id').max())
        logger.info('Loading mandate reimposition data.')

        reimposition_count = 0
        reimposition_dates = {}
        last_reimposition_end_date = pd.Series(pd.NaT, index=population.index)
        reimposition_date = model.compute_reimposition_date(
            deaths, population, reimposition_threshold, min_wait,
            last_reimposition_end_date)

        while len(reimposition_date):  # any place reimposes mandates.
            logger.info(
                f'On mandate reimposition {reimposition_count + 1}. {len(reimposition_date)} locations '
                f'are reimposing mandates.')
            mobility = covariates[['date',
                                   'mobility']].reset_index().set_index(
                                       ['location_id', 'date'])['mobility']
            mobility_lower_bound = model.compute_mobility_lower_bound(
                mobility, mandate_effect)

            new_mobility = model.compute_new_mobility(mobility,
                                                      reimposition_date,
                                                      mobility_lower_bound,
                                                      percent_mandates,
                                                      days_on)

            covariates = covariates.reset_index().set_index(
                ['location_id', 'date'])
            covariates['mobility'] = new_mobility
            covariates = covariates.reset_index(level='date')
            covariate_pred = covariates.loc[the_future].reset_index()

            logger.info('Forecasting beta and components.')
            betas = model.forecast_beta(covariate_pred, coefficients,
                                        beta_scales)
            future_components = model.run_normal_ode_model_by_location(
                initial_condition, beta_params, betas, thetas, location_ids,
                scenario_spec.solver, scenario_spec.system)
            logger.info(
                'Processing ODE results and computing deaths and infections.')
            components, infections, deaths, r_effective = model.compute_output_metrics(
                infection_data, past_components, future_components, thetas,
                beta_params, scenario_spec.system)

            reimposition_count += 1
            reimposition_dates[reimposition_count] = reimposition_date
            last_reimposition_end_date.loc[
                reimposition_date.index] = reimposition_date + days_on
            reimposition_date = model.compute_reimposition_date(
                deaths, population, reimposition_threshold, min_wait,
                last_reimposition_end_date)

    logger.info('Writing outputs.')
    components = components.reset_index()
    covariates = covariates.reset_index()
    outputs = pd.concat([infections, deaths, r_effective],
                        axis=1).reset_index()

    data_interface.save_components(components, scenario_name, draw_id)
    data_interface.save_raw_covariates(covariates, scenario_name, draw_id)
    data_interface.save_raw_outputs(outputs, scenario_name, draw_id)
def run_compute_beta_scaling_parameters(forecast_version: str,
                                        scenario_name: str):
    """Pre-compute the parameters for rescaling predicted beta and write out.

    The predicted beta has two issues we're attempting to adjust.

    The first issue is that the predicted beta does not share the same
    y-intercept as the beta we fit from the ODE step. To fix this,
    we compute an initial scale factor `scale_init` that shifts the whole
    time series so that it lines up with the fit beta at the day of transition.

    The second issue is the long-term scaling of the predicted beta. If we use
    our initial scaling computed from the transition day, we bias the long
    range forecast heavily towards whatever unexplained variance appears
    in the regression on the transition day. Instead we use an average of
    the residual from the regression over some period of time in the past as
    the long range scaling.

    For locations with a large number of deaths, the average of the residual
    mean across draws represents concrete unexplained variance that is biased
    in a particular direction. For locations with a small number of deaths,
    the average of the residual mean is susceptible to a lot of noise week to
    week and so we re-center the distribution of the means about zero. For
    locations in between, we linearly scale this shift between the distribution
    of residual means centered  around zero and the distribution of
    residual means centered around the actual average residual mean.
    We call this final shift `scale_final`

    Parameters
    ----------
    forecast_version
        The path to the forecast version to run this process for.
    scenario_name
        Which scenario in the forecast version to run this process for.

    Notes
    -----
    The last step of average residual re-centering requires information
    from all draws.  This is the only reason this process exists as separate
    from the main forecasting process.

    """
    log.info(f"Computing beta scaling parameters for forecast "
             f"version {forecast_version} and scenario {scenario_name}.")

    forecast_spec: ForecastSpecification = ForecastSpecification.from_path(
        Path(forecast_version) / static_vars.FORECAST_SPECIFICATION_FILE)
    data_interface = ForecastDataInterface.from_specification(forecast_spec)

    locations = data_interface.load_location_ids()
    total_deaths = data_interface.load_total_deaths()
    total_deaths = total_deaths[total_deaths.location_id.isin(
        locations)].set_index('location_id')['deaths']

    beta_scaling = forecast_spec.scenarios[scenario_name].beta_scaling
    scaling_data = compute_initial_beta_scaling_paramters(
        total_deaths, beta_scaling, data_interface)
    residual_mean_offset = compute_residual_mean_offset(
        scaling_data, beta_scaling, total_deaths)
    write_out_beta_scale(scaling_data, residual_mean_offset, scenario_name,
                         data_interface)