def write_out_beta_scales_by_draw(beta_scales: pd.DataFrame, data_interface: ForecastDataInterface, offset: pd.Series, scenario: str) -> None: # Compute these draw specific parameters now that we have the offset. beta_scales['log_beta_residual_mean_offset'] = offset beta_scales['log_beta_residual_mean'] -= offset beta_scales['scale_final'] = np.exp(beta_scales['log_beta_residual_mean']) draw_id = beta_scales['draw'].iat[0] data_interface.save_beta_scales(beta_scales.reset_index(), scenario, draw_id)
def compute_initial_beta_scaling_parameters_by_draw( draw_id: int, total_deaths: pd.Series, beta_scaling: Dict, data_interface: ForecastDataInterface) -> pd.DataFrame: # Construct a list of pandas Series indexed by location and named # as their column will be in the output dataframe. We'll append # to this list as we construct the parameters. draw_data = [ total_deaths.copy(), pd.Series(beta_scaling['window_size'], index=total_deaths.index, name='window_size') ] # Today in the data is unique by draw. It's a combination of the # number of predicted days from the elastispliner in the ODE fit # and the random draw of lag between infection and death from the # infectionator. Don't compute, let's look it up. transition_date = data_interface.load_transition_date(draw_id) beta_regression_df = data_interface.load_beta_regression(draw_id) beta_regression_df = beta_regression_df.set_index( 'location_id').sort_index() idx = beta_regression_df.index # Select out the transition day to compute the initial scaling parameter. beta_transition = beta_regression_df.loc[beta_regression_df['date'] == transition_date.loc[idx]] draw_data.append(beta_transition['beta'].rename('fit_final')) draw_data.append(beta_transition['beta_pred'].rename('pred_start')) draw_data.append((beta_transition['beta'] / beta_transition['beta_pred']).rename('scale_init')) # Compute the beta residual mean for our parameterization and hang on # to some ancillary information that may be useful for plotting/debugging. rs = np.random.RandomState(draw_id) a = rs.randint(1, beta_scaling['average_over_min']) b = rs.randint(a + 7, beta_scaling['average_over_max']) draw_data.append( pd.Series(a, index=total_deaths.index, name='history_days_start')) draw_data.append( pd.Series(b, index=total_deaths.index, name='history_days_end')) beta_past = (beta_regression_df.loc[ beta_regression_df['date'] <= transition_date.loc[idx]].reset_index(). set_index(['location_id', 'date']).sort_index()) log_beta_resid_mean = (np.log( beta_past['beta'] / beta_past['beta_pred']).groupby(level='location_id').apply( lambda x: x.iloc[-b:-a].mean()).rename('log_beta_residual_mean')) draw_data.append(log_beta_resid_mean) draw_data.append(pd.Series(draw_id, index=total_deaths.index, name='draw')) return pd.concat(draw_data, axis=1)
def load_elastispliner_outputs(data_interface: ForecastDataInterface, noisy: bool): es_noisy, es_smoothed = data_interface.load_elastispliner_outputs() es_outputs = es_noisy if noisy else es_smoothed es_outputs = es_outputs.set_index(['location_id', 'date', 'observed']) n_draws = data_interface.get_n_draws() es_outputs = es_outputs.rename( columns={f'draw_{i}': i for i in range(n_draws)}) es_outputs = es_outputs.groupby( level='location_id').apply(lambda x: x - x.shift(fill_value=0)) return es_outputs
def postprocess_measure(data_interface: ForecastDataInterface, resampling_map: Dict[int, Dict[str, List[int]]], scenario_name: str, measure: str) -> None: measure_config = MEASURES[measure] logger.info(f'Loading {measure}.') measure_data = measure_config.loader(scenario_name, data_interface) if isinstance(measure_data, (list, tuple)): logger.info(f'Concatenating {measure}.') measure_data = pd.concat(measure_data, axis=1) logger.info(f'Resampling {measure}.') measure_data = pp.resample_draws(measure_data, resampling_map) if measure_config.aggregator is not None: hierarchy = pp.load_modeled_hierarchy(data_interface) population = pp.load_populations(data_interface) measure_data = measure_config.aggregator(measure_data, hierarchy, population) logger.info(f'Saving draws and summaries for {measure}.') data_interface.save_output_draws(measure_data.reset_index(), scenario_name, measure_config.label) summarized = pp.summarize(measure_data) data_interface.save_output_summaries(summarized.reset_index(), scenario_name, measure_config.label) if measure_config.calculate_cumulative: logger.info(f'Saving cumulative draws and summaries for {measure}.') cumulative_measure_data = measure_data.groupby(level='location_id').cumsum() data_interface.save_output_draws(cumulative_measure_data.reset_index(), scenario_name, measure_config.cumulative_label) summarized = pp.summarize(cumulative_measure_data) data_interface.save_output_summaries(summarized.reset_index(), scenario_name, measure_config.cumulative_label)
def do_beta_forecast(app_metadata: cli_tools.Metadata, forecast_specification: ForecastSpecification, preprocess_only: bool): logger.debug('Starting beta forecast.') data_interface = ForecastDataInterface.from_specification(forecast_specification) # Check scenario covariates the same as regression covariates and that # covariate data versions match. covariates = data_interface.check_covariates(forecast_specification.scenarios) data_interface.make_dirs() # Fixme: Inconsistent data writing interfaces forecast_specification.dump(data_interface.forecast_paths.forecast_specification) if not preprocess_only: forecast_wf = ForecastWorkflow(forecast_specification.data.output_root) n_draws = data_interface.get_n_draws() forecast_wf.attach_tasks(n_draws=n_draws, scenarios=forecast_specification.scenarios, covariates=covariates) try: forecast_wf.run() except WorkflowAlreadyComplete: logger.info('Workflow already complete')
def load_populations(data_interface: ForecastDataInterface): metadata = data_interface.get_infectionator_metadata() model_inputs_path = Path( metadata['death']['metadata']['model_inputs_metadata']['output_path']) population_path = model_inputs_path / 'output_measures' / 'population' / 'all_populations.csv' populations = pd.read_csv(population_path) return populations
def load_hierarchy(data_interface: ForecastDataInterface): metadata = data_interface.get_infectionator_metadata() model_inputs_path = Path( metadata['death']['metadata']['model_inputs_metadata']['output_path']) hierarchy_path = model_inputs_path / 'locations' / 'modeling_hierarchy.csv' hierarchy = pd.read_csv(hierarchy_path) return hierarchy
def load_coefficients(scenario: str, data_interface: ForecastDataInterface): _runner = functools.partial(load_coefficients_by_draw, data_interface=data_interface) draws = range(data_interface.get_n_draws()) with multiprocessing.Pool(FORECAST_SCALING_CORES) as pool: outputs = pool.map(_runner, draws) return outputs
def load_beta_residuals_by_draw( draw_id: int, data_interface: ForecastDataInterface) -> pd.Series: beta_regression = data_interface.load_beta_regression(draw_id) beta_regression = (beta_regression.set_index( ['location_id', 'date']).sort_index()[['beta', 'beta_pred']]) beta_residual = np.log(beta_regression['beta'] / beta_regression['beta_pred']).rename(draw_id) return beta_residual
def load_coefficients_by_draw( draw_id: int, data_interface: ForecastDataInterface) -> pd.Series: coefficients = data_interface.load_regression_coefficients(draw_id) coefficients = coefficients.set_index('location_id').stack().reset_index() coefficients.columns = ['location_id', 'covariate', draw_id] coefficients = coefficients.set_index(['location_id', 'covariate'])[draw_id] return coefficients
def run_resample_map(forecast_version: str) -> None: forecast_spec = ForecastSpecification.from_path( Path(forecast_version) / static_vars.FORECAST_SPECIFICATION_FILE) resampling_params = forecast_spec.postprocessing.resampling data_interface = ForecastDataInterface.from_specification(forecast_spec) deaths, *_ = pp.load_output_data(resampling_params['reference_scenario'], data_interface) deaths = pd.concat(deaths, axis=1) resampling_map = pp.build_resampling_map(deaths, resampling_params) data_interface.save_resampling_map(resampling_map)
def load_output_data_by_draw( draw_id: int, scenario: str, data_interface: ForecastDataInterface ) -> Tuple[pd.Series, pd.Series, pd.Series]: draw_df = data_interface.load_raw_outputs(scenario, draw_id) draw_df = draw_df.set_index(['location_id', 'date']).sort_index() deaths = draw_df.reset_index().set_index( ['location_id', 'date', 'observed'])['deaths'].rename(draw_id) infections = draw_df['infections'].rename(draw_id) r_effective = draw_df['r_effective'].rename(draw_id) return deaths, infections, r_effective
def load_scaling_parameters_by_draw( draw_id: int, scenario: str, data_interface: ForecastDataInterface) -> pd.Series: scaling_parameters = data_interface.load_beta_scales(scenario, draw_id) scaling_parameters = scaling_parameters.set_index( 'location_id').stack().reset_index() scaling_parameters.columns = ['location_id', 'scaling_parameter', draw_id] scaling_parameters = scaling_parameters.set_index( ['location_id', 'scaling_parameter'])[draw_id] return scaling_parameters
def load_covariate_by_draw(draw_id: int, covariate: str, time_varying: bool, scenario: str, data_interface: ForecastDataInterface) -> pd.Series: covariate_df = data_interface.load_raw_covariates(scenario, draw_id) covariate_df = covariate_df.set_index(['location_id', 'date']).sort_index() if time_varying: covariate_data = covariate_df[covariate].rename(draw_id) else: covariate_data = covariate_df.groupby( level='location_id')[covariate].max().rename(draw_id) return covariate_data
def load_beta_residuals( scenario: str, data_interface: ForecastDataInterface) -> List[pd.Series]: _runner = functools.partial( load_beta_residuals_by_draw, data_interface=data_interface, ) draws = range(data_interface.get_n_draws()) with multiprocessing.Pool(FORECAST_SCALING_CORES) as pool: beta_residuals = pool.map(_runner, draws) return beta_residuals
def get_locations_modeled_and_missing(data_interface: ForecastDataInterface): hierarchy = load_hierarchy(data_interface) modeled_locations = data_interface.load_location_ids() most_detailed_locs = hierarchy.loc[hierarchy.most_detailed == 1, 'location_id'].unique().tolist() missing_locations = list( set(most_detailed_locs).difference(modeled_locations)) locations_modeled_and_missing = { 'modeled': modeled_locations, 'missing': missing_locations } return locations_modeled_and_missing
def load_output_data(scenario: str, data_interface: ForecastDataInterface): _runner = functools.partial( load_output_data_by_draw, scenario=scenario, data_interface=data_interface, ) draws = range(data_interface.get_n_draws()) with multiprocessing.Pool(FORECAST_SCALING_CORES) as pool: outputs = pool.map(_runner, draws) deaths, infections, r_effective = zip(*outputs) return deaths, infections, r_effective
def postprocess_miscellaneous(data_interface: ForecastDataInterface, scenario_name: str, measure: str): miscellaneous_config = MISCELLANEOUS[measure] logger.info(f'Loading {measure}.') miscellaneous_data = miscellaneous_config.loader(data_interface) if miscellaneous_config.aggregator is not None: hierarchy = pp.load_modeled_hierarchy(data_interface) population = pp.load_populations(data_interface) miscellaneous_data = miscellaneous_config.aggregator(miscellaneous_data, hierarchy, population) logger.info(f'Saving {measure} data.') if miscellaneous_config.is_table: data_interface.save_output_miscellaneous(miscellaneous_data.reset_index(), scenario_name, miscellaneous_config.label) else: # FIXME: yuck miscellaneous_dir = data_interface.forecast_paths.scenario_paths[scenario_name].output_miscellaneous measure_path = miscellaneous_dir / f'{miscellaneous_config.label}.yaml' with measure_path.open('w') as f: yaml.dump(miscellaneous_data, f)
def load_full_data(data_interface: ForecastDataInterface) -> pd.DataFrame: full_data = data_interface.load_full_data() full_data = full_data.set_index(['location_id', 'date']) full_data = full_data.rename( columns={ 'Deaths': 'cumulative_deaths', 'Confirmed': 'cumulative_cases', 'Hospitalizations': 'cumulative_hospitalizations', }) full_data = full_data[[ 'cumulative_cases', 'cumulative_deaths', 'cumulative_hospitalizations' ]] return full_data
def build_version_map(data_interface: ForecastDataInterface) -> pd.Series: version_map = {} version_map[ 'forecast_version'] = data_interface.forecast_paths.root_dir.name version_map[ 'regression_version'] = data_interface.regression_paths.root_dir.name version_map[ 'covariate_version'] = data_interface.covariate_paths.root_dir.name # FIXME: infectionator doesn't do metadata the right way. inf_metadata = data_interface.get_infectionator_metadata() inf_output_dir = inf_metadata['wrapped_R_call'][-1].split()[1].strip("'") version_map['infectionator_version'] = Path(inf_output_dir).name death_metadata = inf_metadata['death']['metadata'] version_map['elastispliner_version'] = Path( death_metadata['output_path']).name model_inputs_metadata = death_metadata['model_inputs_metadata'] version_map['model_inputs_version'] = Path( model_inputs_metadata['output_path']).name snapshot_metadata = model_inputs_metadata['snapshot_metadata'] version_map['snapshot_version'] = Path( snapshot_metadata['output_path']).name jhu_snapshot_metadata = model_inputs_metadata['jhu_snapshot_metadata'] version_map['jhu_snapshot_version'] = Path( jhu_snapshot_metadata['output_path']).name try: # There is a typo in the process that generates this key. # Protect ourselves in case they fix it without warning. webscrape_metadata = model_inputs_metadata['webcrape_metadata'] except KeyError: webscrape_metadata = model_inputs_metadata['webscrape_metadata'] version_map['webscrape_version'] = Path( webscrape_metadata['output_path']).name version_map['location_set_version_id'] = model_inputs_metadata[ 'run_arguments']['lsvid'] try: version_map['location_set_version_id'] = int( version_map['location_set_version_id']) except: pass version_map['data_date'] = Path( snapshot_metadata['output_path']).name.split('.')[0].replace('_', '-') version_map = pd.Series(version_map) version_map = version_map.reset_index() version_map.columns = ['name', 'version'] return version_map
def load_covariate(covariate: str, time_varying: bool, scenario: str, data_interface: ForecastDataInterface) -> List[pd.Series]: _runner = functools.partial( load_covariate_by_draw, covariate=covariate, time_varying=time_varying, scenario=scenario, data_interface=data_interface, ) draws = range(data_interface.get_n_draws()) with multiprocessing.Pool(FORECAST_SCALING_CORES) as pool: outputs = pool.map(_runner, draws) return outputs
def compute_initial_beta_scaling_paramters( total_deaths: pd.Series, beta_scaling: dict, data_interface: ForecastDataInterface) -> List[pd.DataFrame]: # Serialization is our bottleneck, so we parallelize draw level data # ingestion and computation across multiple processes. _runner = functools.partial( compute_initial_beta_scaling_parameters_by_draw, total_deaths=total_deaths, beta_scaling=beta_scaling, data_interface=data_interface) draws = list(range(data_interface.get_n_draws())) with multiprocessing.Pool(FORECAST_SCALING_CORES) as pool: scaling_data = list(pool.imap(_runner, draws)) return scaling_data
def run_mean_level_mandate_reimposition(forecast_version: str, scenario_name: str, reimposition_number: int): logger.info( f"Initiating SEIIR mean mean level mandate reimposition {reimposition_number} " f"for scenario {scenario_name}.") forecast_spec: ForecastSpecification = ForecastSpecification.from_path( Path(forecast_version) / static_vars.FORECAST_SPECIFICATION_FILE) scenario_spec = forecast_spec.scenarios[scenario_name] data_interface = ForecastDataInterface.from_specification(forecast_spec) resampling_map = data_interface.load_resampling_map() deaths = pp.load_deaths(scenario_name, data_interface) deaths = pd.concat(deaths, axis=1) deaths = pp.resample_draws(deaths, resampling_map) deaths = pp.summarize(deaths) deaths = deaths['mean'].rename('deaths').reset_index() deaths['date'] = pd.to_datetime(deaths['date']) modeled_locations = deaths['location_id'].unique().tolist() deaths = deaths.set_index(['location_id', 'date']) population = pp.load_populations(data_interface) population = population[population.location_id.isin(modeled_locations) & (population.age_group_id == 22) & (population.sex_id == 3)].set_index('location_id')['population'] min_wait, days_on, reimposition_threshold = model.unpack_parameters( scenario_spec.algorithm_params) previous_dates = pd.Series(pd.NaT, index=population.index) for previous_reimposition in range(reimposition_number - 1, 0, -1): these_dates = data_interface.load_reimposition_dates( scenario=scenario_name, reimposition_number=previous_reimposition) these_dates = pd.to_datetime( these_dates.set_index('location_id')['reimposition_date']) these_dates = these_dates.reindex(previous_dates.index) this_reimposition = previous_dates.isnull() & these_dates.notnull() previous_dates.loc[this_reimposition] = these_dates.loc[ this_reimposition] last_reimposition_end_date = previous_dates + days_on reimposition_date = model.compute_reimposition_date( deaths, population, reimposition_threshold, min_wait, last_reimposition_end_date) data_interface.save_reimposition_dates( reimposition_date.reset_index(), scenario=scenario_name, reimposition_number=reimposition_number)
def load_elastispliner_inputs( data_interface: ForecastDataInterface) -> pd.DataFrame: es_inputs = data_interface.load_elastispliner_inputs() es_inputs = es_inputs.set_index(['location_id', 'date']) cumulative_cases = (es_inputs['Confirmed case rate'] * es_inputs['population']).rename('cumulative_cases') cumulative_deaths = (es_inputs['Death rate'] * es_inputs['population']).rename('cumulative_deaths') cumulative_hospitalizations = (es_inputs['Hospitalization rate'] * es_inputs['population']) cumulative_hospitalizations = cumulative_hospitalizations.rename( 'cumulative_hospitalizations') es_inputs = pd.concat( [cumulative_cases, cumulative_deaths, cumulative_hospitalizations], axis=1) return es_inputs
def run_seir_postprocessing(forecast_version: str, scenario_name: str, measure: str) -> None: logger.info(f'Starting postprocessing for forecast version {forecast_version}, scenario {scenario_name}.') forecast_spec = ForecastSpecification.from_path( Path(forecast_version) / static_vars.FORECAST_SPECIFICATION_FILE ) scenario_spec = forecast_spec.scenarios[scenario_name] data_interface = ForecastDataInterface.from_specification(forecast_spec) resampling_map = data_interface.load_resampling_map() if measure in MEASURES: postprocess_measure(data_interface, resampling_map, scenario_name, measure) elif measure in COVARIATES: postprocess_covariate(data_interface, resampling_map, scenario_spec, scenario_name, measure) elif measure in MISCELLANEOUS: postprocess_miscellaneous(data_interface, scenario_name, measure) else: raise NotImplementedError(f'Unknown measure {measure}.') logger.info('**DONE**')
def postprocess_covariate(data_interface: ForecastDataInterface, resampling_map: Dict[int, Dict[str, List[int]]], scenario_spec: ScenarioSpecification, scenario_name: str, covariate: str) -> None: covariate_config = COVARIATES[covariate] logger.info(f'Loading {covariate}.') covariate_data = covariate_config.loader(covariate, covariate_config.time_varying, scenario_name, data_interface) logger.info(f'Concatenating and resampling {covariate}.') covariate_data = pd.concat(covariate_data, axis=1) covariate_data = pp.resample_draws(covariate_data, resampling_map) if covariate_config.aggregator is not None: hierarchy = pp.load_modeled_hierarchy(data_interface) population = pp.load_populations(data_interface) covariate_data = covariate_config.aggregator(covariate_data, hierarchy, population) covariate_version = scenario_spec.covariates[covariate] location_ids = data_interface.load_location_ids() n_draws = data_interface.get_n_draws() logger.info(f'Loading and processing input data for {covariate}.') input_covariate_data = data_interface.load_covariate(covariate, covariate_version, location_ids, with_observed=True) covariate_observed = input_covariate_data.reset_index(level='observed') covariate_data = covariate_data.merge(covariate_observed, left_index=True, right_index=True, how='outer').reset_index() draw_cols = [f'draw_{i}' for i in range(n_draws)] if 'date' in covariate_data.columns: index_cols = ['location_id', 'date', 'observed'] else: index_cols = ['location_id', 'observed'] covariate_data = covariate_data.set_index(index_cols)[draw_cols] covariate_data['modeled'] = covariate_data.notnull().all(axis=1).astype(int) input_covariate = pd.concat([input_covariate_data.reorder_levels(index_cols)] * n_draws, axis=1) input_covariate.columns = draw_cols covariate_data = covariate_data.combine_first(input_covariate).set_index('modeled', append=True) logger.info(f'Saving data for {covariate}.') if covariate_config.draw_level: data_interface.save_output_draws(covariate_data.reset_index(), scenario_name, covariate_config.label) summarized_data = pp.summarize(covariate_data) data_interface.save_output_summaries(summarized_data.reset_index(), scenario_name, covariate_config.label)
def test_forecast_io(self, tmpdir, components, beta_scales, forecast_outputs): forecast_paths = ForecastPaths( root_dir=Path(tmpdir), scenarios=['happy'], ) di = ForecastDataInterface( forecast_paths=None, regression_paths=None, covariate_paths=None, regression_marshall=None, forecast_marshall=CSVMarshall.from_paths(forecast_paths), ) # Step 1: save files di.save_components(components, scenario="happy", draw_id=4) di.save_beta_scales(beta_scales, scenario="happy", draw_id=4) di.save_raw_outputs(forecast_outputs, scenario="happy", draw_id=4) # Step 2: test save location # this is sort of cheating, but it ensures that scenario things are # nicely nested as they should be assert (Path(tmpdir) / "happy" / "component_draws" / "draw_4.csv").exists() assert (Path(tmpdir) / "happy" / "beta_scaling" / "draw_4.csv").exists() assert (Path(tmpdir) / "happy" / "raw_outputs" / "draw_4.csv").exists() # Step 3: load those files loaded_components = di.load_components(scenario="happy", draw_id=4) # Load components now does some formatting, which broke the tests. # Back out these changes here. loaded_components = loaded_components.reset_index() loaded_components['date'] = loaded_components['date'].astype(str) loaded_components = loaded_components[ components.columns] # Use the same sort order. loaded_beta_scales = di.load_beta_scales(scenario="happy", draw_id=4) loaded_forecast_outputs = di.load_raw_outputs(scenario="happy", draw_id=4) # Step 4: test files pandas.testing.assert_frame_equal(components, loaded_components) pandas.testing.assert_frame_equal(beta_scales, loaded_beta_scales) pandas.testing.assert_frame_equal(forecast_outputs, loaded_forecast_outputs)
def test_regression_io(self, tmpdir, coefficients, dates, regression_beta, location_data, parameters): """ Test I/O relating to regression stage. This only includes loading files, as they are all saved by the RegressionDataInterface. """ regress_paths = RegressionPaths(Path(tmpdir)) rdi = RegressionDataInterface( infection_paths=None, regression_paths=regress_paths, covariate_paths=None, regression_marshall=CSVMarshall(regress_paths.root_dir), ) fdi = ForecastDataInterface( forecast_paths=None, regression_paths=None, covariate_paths=None, regression_marshall=CSVMarshall.from_paths(regress_paths), forecast_marshall=None, ) # Step 1: save files (normally done in regression) rdi.save_regression_coefficients(coefficients, draw_id=4) rdi.save_beta_param_file(parameters, draw_id=4) rdi.save_date_file(dates, draw_id=4) rdi.save_regression_betas(regression_beta, draw_id=4) rdi.save_location_data(location_data, draw_id=4) # Step 2: load files as they would be loaded in forecast loaded_coefficients = fdi.load_regression_coefficients(draw_id=4) loaded_parameters = fdi.load_beta_params(draw_id=4) loaded_transition_dates = fdi.load_transition_date(draw_id=4) loaded_regression_beta = fdi.load_beta_regression(draw_id=4) loaded_location_data = fdi.load_infection_data(draw_id=4) # Step 3: test files pandas.testing.assert_frame_equal(coefficients, loaded_coefficients) # some load methods do pandas.to_datetime conversion on columns transition_dates = dates.set_index('location_id').sort_index( )['end_date'].rename('date').reset_index() loaded_transition_dates = loaded_transition_dates.reset_index() assert_equal_after_date_conversion(transition_dates, loaded_transition_dates, date_cols=['date']) assert_equal_after_date_conversion(regression_beta, loaded_regression_beta, date_cols=['date']) assert_equal_after_date_conversion(location_data, loaded_location_data, date_cols=['date']) # load_beta_params does not return a DataFrame but instead a dict # in addition, some rounding error occurs in the save/load from CSV expected_parameters = parameters.set_index( 'params')['values'].to_dict() try: assert expected_parameters == loaded_parameters except AssertionError: # assert keys are identical assert set(expected_parameters) == set(loaded_parameters) # assert each value is accurate to 15 decimal places for k, expected in expected_parameters.items(): loaded = loaded_parameters[k] numpy.testing.assert_almost_equal(loaded, expected, decimal=15) warnings.warn( "beta fit parameters accurate only to 15 decimal places after save/load cycle" )
def run_beta_forecast(draw_id: int, forecast_version: str, scenario_name: str, **kwargs): logger.info( f"Initiating SEIIR beta forecasting for scenario {scenario_name}, draw {draw_id}." ) forecast_spec: ForecastSpecification = ForecastSpecification.from_path( Path(forecast_version) / static_vars.FORECAST_SPECIFICATION_FILE) scenario_spec = forecast_spec.scenarios[scenario_name] data_interface = ForecastDataInterface.from_specification(forecast_spec) logger.info('Loading input data.') location_ids = data_interface.load_location_ids() # Thetas are a parameter generated from assumption or OOS predictive # validity testing to curtail some of the bad behavior of the model. thetas = data_interface.load_thetas(scenario_spec.theta) # Grab the last day of data in the model by location id. This will # correspond to the initial condition for the projection. transition_date = data_interface.load_transition_date(draw_id) # We'll use the beta and SEIR compartments from this data set to get # the ODE initial condition. beta_regression_df = data_interface.load_beta_regression( draw_id).set_index('location_id').sort_index() past_components = beta_regression_df[['date', 'beta'] + static_vars.SEIIR_COMPARTMENTS] # Select out the initial condition using the day of transition. transition_day = past_components['date'] == transition_date.loc[ past_components.index] initial_condition = past_components.loc[transition_day, static_vars.SEIIR_COMPARTMENTS] before_model = past_components['date'] < transition_date.loc[ past_components.index] past_components = past_components[before_model] # Covariates and coefficients, and scaling parameters are # used to compute beta hat in the future. covariates = data_interface.load_covariates(scenario_spec, location_ids) coefficients = data_interface.load_regression_coefficients(draw_id) # Grab the projection of the covariates into the future, keeping the # day of transition from past model to future model. covariates = covariates.set_index('location_id').sort_index() the_future = covariates['date'] >= transition_date.loc[covariates.index] covariate_pred = covariates.loc[the_future].reset_index() beta_scales = data_interface.load_beta_scales(scenario=scenario_name, draw_id=draw_id) # We'll use the same params in the ODE forecast as we did in the fit. beta_params = data_interface.load_beta_params(draw_id=draw_id) # We'll need this to compute deaths and to splice with the forecasts. infection_data = data_interface.load_infection_data(draw_id) if ((1 < thetas) | thetas < -1).any(): raise ValueError('Theta must be between -1 and 1.') if (beta_params['sigma'] - thetas >= 1).any(): raise ValueError('Sigma - theta must be smaller than 1') # Modeling starts logger.info('Forecasting beta and components.') betas = model.forecast_beta(covariate_pred, coefficients, beta_scales) future_components = model.run_normal_ode_model_by_location( initial_condition, beta_params, betas, thetas, location_ids, scenario_spec.solver, scenario_spec.system) logger.info('Processing ODE results and computing deaths and infections.') components, infections, deaths, r_effective = model.compute_output_metrics( infection_data, past_components, future_components, thetas, beta_params, scenario_spec.system) if scenario_spec.algorithm == 'draw_level_mandate_reimposition': logger.info('Entering mandate reimposition.') # Info data specific to mandate reimposition percent_mandates = data_interface.load_covariate_info( 'mobility', 'mandate_lift', location_ids) mandate_effect = data_interface.load_covariate_info( 'mobility', 'effect', location_ids) min_wait, days_on, reimposition_threshold = model.unpack_parameters( scenario_spec.algorithm_params) population = (components[static_vars.SEIIR_COMPARTMENTS].sum( axis=1).rename('population').groupby('location_id').max()) logger.info('Loading mandate reimposition data.') reimposition_count = 0 reimposition_dates = {} last_reimposition_end_date = pd.Series(pd.NaT, index=population.index) reimposition_date = model.compute_reimposition_date( deaths, population, reimposition_threshold, min_wait, last_reimposition_end_date) while len(reimposition_date): # any place reimposes mandates. logger.info( f'On mandate reimposition {reimposition_count + 1}. {len(reimposition_date)} locations ' f'are reimposing mandates.') mobility = covariates[['date', 'mobility']].reset_index().set_index( ['location_id', 'date'])['mobility'] mobility_lower_bound = model.compute_mobility_lower_bound( mobility, mandate_effect) new_mobility = model.compute_new_mobility(mobility, reimposition_date, mobility_lower_bound, percent_mandates, days_on) covariates = covariates.reset_index().set_index( ['location_id', 'date']) covariates['mobility'] = new_mobility covariates = covariates.reset_index(level='date') covariate_pred = covariates.loc[the_future].reset_index() logger.info('Forecasting beta and components.') betas = model.forecast_beta(covariate_pred, coefficients, beta_scales) future_components = model.run_normal_ode_model_by_location( initial_condition, beta_params, betas, thetas, location_ids, scenario_spec.solver, scenario_spec.system) logger.info( 'Processing ODE results and computing deaths and infections.') components, infections, deaths, r_effective = model.compute_output_metrics( infection_data, past_components, future_components, thetas, beta_params, scenario_spec.system) reimposition_count += 1 reimposition_dates[reimposition_count] = reimposition_date last_reimposition_end_date.loc[ reimposition_date.index] = reimposition_date + days_on reimposition_date = model.compute_reimposition_date( deaths, population, reimposition_threshold, min_wait, last_reimposition_end_date) logger.info('Writing outputs.') components = components.reset_index() covariates = covariates.reset_index() outputs = pd.concat([infections, deaths, r_effective], axis=1).reset_index() data_interface.save_components(components, scenario_name, draw_id) data_interface.save_raw_covariates(covariates, scenario_name, draw_id) data_interface.save_raw_outputs(outputs, scenario_name, draw_id)
def load_betas_by_draw(draw_id: int, scenario: str, data_interface: ForecastDataInterface) -> pd.Series: components = data_interface.load_components(scenario, draw_id) draw_betas = (components.sort_index()['beta'].rename(draw_id)) return draw_betas