def pp_launch(r, kwargs, distributions, params, initial_counts, testing_params, measure_list, max_time, thresholds_roc, store_mob, store_measure_bernoullis): mob = MobilitySimulator(**kwargs) mob.simulate(max_time=max_time) sim = DiseaseModel(mob, distributions) sim.launch_epidemic( params=params, initial_counts=initial_counts, testing_params=testing_params, measure_list=measure_list, thresholds_roc=thresholds_roc, verbose=False) result = { 'state' : sim.state, 'state_started_at': sim.state_started_at, 'state_ended_at': sim.state_ended_at, 'measure_list' : copy.deepcopy(sim.measure_list), 'people_age' : sim.mob.people_age, 'children_count_iasy': sim.children_count_iasy, 'children_count_ipre': sim.children_count_ipre, 'children_count_isym': sim.children_count_isym, 'tracing_stats' : sim.tracing_stats, } if store_mob: result['mob'] = sim.mob ml = result['measure_list'] if not store_measure_bernoullis: ml.exit_run() return result
def pp_launch(r, kwargs, distributions, params, initial_counts, testing_params, measure_list, max_time): mob = MobilitySimulator(**kwargs) mob.simulate(max_time=max_time) sim = DiseaseModel(mob, distributions) sim.launch_epidemic(params=params, initial_counts=initial_counts, testing_params=testing_params, measure_list=measure_list, verbose=False) result = { 'state': sim.state, 'state_started_at': sim.state_started_at, 'state_ended_at': sim.state_ended_at, 'measure_list': copy.deepcopy(sim.measure_list), 'people_age': sim.mob.people_age, 'children_count_iasy': sim.children_count_iasy, 'children_count_ipre': sim.children_count_ipre, 'children_count_isym': sim.children_count_isym, } if STORE_MOB: result['mob'] = sim.mob return result
def get_calibrated_params_limited_iters(country, area, multi_beta_calibration, maxiters): """ Returns calibrated parameters using only the first `maxiters` iterations of BO. """ state = load_state(calibration_states[country][area]) train_G = state['train_G'] train_G = train_G[:min(maxiters, len(train_G))] train_theta = state['train_theta'] mob_settings = calibration_mob_paths[country][area][0] with open(mob_settings, 'rb') as fp: mob_kwargs = pickle.load(fp) mob = MobilitySimulator(**mob_kwargs) data_start_date = calibration_start_dates[country][area] data_end_date = calibration_lockdown_dates[country]['end'] unscaled_area_cases = collect_data_from_df( country=country, area=area, datatype='new', start_date_string=data_start_date, end_date_string=data_end_date) assert (len(unscaled_area_cases.shape) == 2) # Scale down cases based on number of people in town and region sim_cases = downsample_cases(unscaled_area_cases, mob_kwargs) n_days, n_age = sim_cases.shape G_obs = torch.tensor(sim_cases).reshape(1, n_days * n_age) G_obs_aggregate = torch.tensor(sim_cases).sum(dim=-1) def objective(G): return -(G - G_obs_aggregate).pow(2).sum(dim=-1) / n_days train_G_objectives = objective(train_G) best_observed_idx = train_G_objectives.argmax() best_observed_obj = train_G_objectives[best_observed_idx].item() param_bounds = (calibration_model_param_bounds_multi if multi_beta_calibration else calibration_model_param_bounds_single) sim_bounds = pdict_to_parr(pdict=param_bounds, multi_beta_calibration=multi_beta_calibration).T normalized_calibrated_params = train_theta[best_observed_idx] calibrated_params = transforms.unnormalize(normalized_calibrated_params, sim_bounds) calibrated_params = parr_to_pdict( parr=calibrated_params, multi_beta_calibration=multi_beta_calibration) return calibrated_params
def make_bayes_opt_functions(args): ''' Generates and returns functions used to run Bayesian optimization Argument: args: Keyword arguments specifying exact settings for optimization Returns: objective : objective maximized for BO generate_initial_observations : function to generate initial observations initialize_model : function to initialize GP optimize_acqf_and_get_observation : function to optimize acquisition function based on model case_diff : computes case difference between prediction array and ground truth at t=T unnormalize_theta : converts BO params to simulation params (unit cube to real parameters) header : header lines to be printed to log file ''' header = [] # depending on mode, set parameter bounds if args.measures_optimized: param_bounds = settings_measures_param_bounds else: param_bounds = settings_model_param_bounds # remember line executed header.append('=' * 100) header.append(datetime.now().strftime("%d/%m/%Y %H:%M:%S")) header.append('python ' + ' '.join(sys.argv)) header.append('=' * 100) mob_settings = args.mob data_area = args.area data_country = args.country # initialize mobility object to obtain information (no trace generation yet) with open(mob_settings, 'rb') as fp: kwargs = pickle.load(fp) mob = MobilitySimulator(**kwargs) # data settings verbose = not args.not_verbose use_households = not args.no_households data_start_date = args.start data_end_date = args.end debug_simulation_days = args.endsimat # simulation settings n_init_samples = args.ninit n_iterations = args.niters simulation_roll_outs = args.rollouts cpu_count = args.cpu_count dynamic_tracing = not args.no_dynamic_tracing load_observations = args.load # set testing parameters testing_params = settings_testing_params # BO acquisition function optimization (Knowledge gradient) acqf_opt_num_fantasies = args.acqf_opt_num_fantasies acqf_opt_num_restarts = args.acqf_opt_num_restarts acqf_opt_raw_samples = args.acqf_opt_raw_samples acqf_opt_batch_limit = args.acqf_opt_batch_limit acqf_opt_maxiter = args.acqf_opt_maxiter """ Bayesian optimization pipeline """ # Import Covid19 data # Shape (max_days, num_age_groups) new_cases_ = collect_data_from_df(country=data_country, area=data_area, datatype='new', start_date_string=data_start_date, end_date_string=data_end_date) assert(len(new_cases_.shape) == 2) if new_cases_[0].sum() == 0: print('No positive cases at provided start time; cannot seed simulation.\n' 'Consider setting a later start date for calibration using the "--start" flag.') exit(0) # Scale down cases based on number of people in town, region, and downsampling new_cases = np.ceil( (new_cases_ * mob.num_people_unscaled) / (mob.downsample * mob.region_population)) num_age_groups = new_cases.shape[1] header.append('Downsampling : ' + str(mob.downsample)) header.append('Town population: ' + str(mob.num_people)) header.append('Town population (unscaled): ' + str(mob.num_people_unscaled)) header.append('Region population : ' + str(mob.region_population)) # Set test capacity per day as (a) command line; or (b) maximum daily positive case increase over observed period if args.testingcap: testing_params['tests_per_batch'] = (args.testingcap / mob.num_people_unscaled) else: daily_increase = new_cases.sum(axis=1)[1:] - new_cases.sum(axis=1)[:-1] testing_params['tests_per_batch'] = int(daily_increase.max()) test_lag_days = int(testing_params['test_reporting_lag'] / TO_HOURS) assert(int(testing_params['test_reporting_lag']) % 24 == 0) # generate initial seeds based on case numbers initial_seeds = gen_initial_seeds(new_cases) header.append('Initial seed counts : ' + str(initial_seeds)) # in debug mode, shorten time of simulation, shorten time if debug_simulation_days: new_cases = new_cases[:debug_simulation_days] # Maximum time fixed by real data, init mobility simulator simulation # maximum time to simulate, in hours max_time = int(new_cases.shape[0] * TO_HOURS) max_time += TO_HOURS * test_lag_days # longer due to test lag in simulations testing_params['testing_t_window'] = [0.0, max_time] mob.simulate(max_time=max_time, dynamic_tracing=True) header.append( 'Daily test capacity in sim.: ' + str(testing_params['tests_per_batch'])) header.append( 'Max time T (days): ' + str(new_cases.shape[0])) header.append( 'Target cases per age group at t=0: ' + str(list(map(int, new_cases[0].tolist())))) header.append( 'Target cases per age group at t=T: ' + str(list(map(int, new_cases[-1].tolist())))) # instantiate correct distributions distributions = CovidDistributions(country=args.country) # set Bayesian optimization target as positive cases n_days, n_age = new_cases.shape G_obs = torch.tensor(new_cases).reshape(n_days * n_age) # flattened sim_bounds = pdict_to_parr(param_bounds, measures_optimized=args.measures_optimized).T n_params = sim_bounds.shape[1] header.append(f'Parameters : {n_params}') header.append('Parameter bounds: ' + str(parr_to_pdict(sim_bounds.T, measures_optimized=args.measures_optimized))) # extract lockdown period sim_start_date = pd.to_datetime(args.start) sim_end_date = sim_start_date + timedelta(days=int(max_time / TO_HOURS)) lockdown_start_date = pd.to_datetime( settings_lockdown_dates[args.country]['start']) lockdown_end_date = pd.to_datetime( settings_lockdown_dates[args.country]['end']) days_until_lockdown_start = (lockdown_start_date - sim_start_date).days days_until_lockdown_end = (lockdown_end_date - sim_start_date).days header.append(f'Simulation starts at : {sim_start_date}') header.append(f' ends at : {sim_end_date}') header.append(f'Lockdown starts at : {lockdown_start_date}') header.append(f' ends at : {lockdown_end_date}') # create settings dictionary for simulations launch_kwargs = dict( mob_settings=mob_settings, distributions=distributions, random_repeats=simulation_roll_outs, cpu_count=cpu_count, initial_seeds=initial_seeds, testing_params=testing_params, max_time=max_time, num_people=mob.num_people, num_sites=mob.num_sites, home_loc=mob.home_loc, site_loc=mob.site_loc, dynamic_tracing=dynamic_tracing, verbose=False) ''' Define central functions for optimization ''' G_obs = torch.tensor(new_cases).reshape(1, n_days * n_age) def composite_squared_loss(G): ''' Objective function Note: in BO, objectives are maximized ''' return - (G - G_obs).pow(2).sum(dim=-1) # select objective objective = GenericMCObjective(composite_squared_loss) def case_diff(preds): ''' Computes case difference of predictions and ground truth at t=T ''' return preds.reshape(n_days, n_age)[-1].sum() - torch.tensor(new_cases)[-1].sum() def unnormalize_theta(theta): ''' Computes unnormalized parameters ''' return transforms.unnormalize(theta, sim_bounds) def composite_simulation(norm_params): """ Takes a set of normalized (unit cube) BO parameters and returns simulator output means and standard errors based on multiple random restarts. This corresponds to the black-box function. """ # un-normalize normalized params to obtain simulation parameters params = transforms.unnormalize(norm_params, sim_bounds) # finalize settings based which parameters are calibrated kwargs = copy.deepcopy(launch_kwargs) if args.measures_optimized: ''' Measures are calibrated ''' measure_params = parr_to_pdict(params, measures_optimized=args.measures_optimized) # social distancing measures: calibration is only done for `SocialDistancingForAllMeasure` for now measure_list_ = [ SocialDistancingForPositiveMeasure( t_window=Interval(0.0, max_time), p_stay_home=1.0), SocialDistancingForPositiveMeasureHousehold( t_window=Interval(0.0, max_time), p_isolate=1.0), SocialDistancingForAllMeasure( t_window=Interval(TO_HOURS * days_until_lockdown_start, TO_HOURS * days_until_lockdown_end), p_stay_home=measure_params['p_stay_home']), ] # close sites if specified if args.measures_close: beta_multipliers = {'education': 1.0, 'social': 1.0, 'bus_stop': 1.0, 'office': 1.0, 'supermarket': 1.0} for category in args.measures_close: if category in beta_multipliers.keys(): beta_multipliers[category] = 0.0 else: raise ValueError(f'Site type `{category}` passed in `--measures_close` is invalid.\n' f'Available are {str(list(beta_multipliers.keys()))}') measure_list_.append(BetaMultiplierMeasureByType( t_window=Interval(TO_HOURS * days_until_lockdown_start, TO_HOURS * days_until_lockdown_end), beta_multiplier=beta_multipliers )) kwargs['measure_list'] = MeasureList(measure_list_) # get optimized model paramters for this country and area calibrated_model_params = settings_optimized_town_params[args.country][args.area] if calibrated_model_params is None: raise ValueError(f'Cannot optimize measures for {args.country}-{args.area} because model parameters ' 'have not been fitted yet. Set values in `calibration_settings.py`') kwargs['params'] = calibrated_model_params else: ''' Model parameters calibrated ''' kwargs['measure_list'] = MeasureList([ SocialDistancingForPositiveMeasure( t_window=Interval(0.0, max_time), p_stay_home=1.0), SocialDistancingForPositiveMeasureHousehold( t_window=Interval(0.0, max_time), p_isolate=1.0), ]) kwargs['params'] = parr_to_pdict(params, measures_optimized=args.measures_optimized) # run simulation in parallel, summary = launch_parallel_simulations(**kwargs) # (random_repeats, n_people) posi_started = torch.tensor(summary.state_started_at['posi']) posi_started -= test_lag_days * TO_HOURS # account for test lag # (random_repeats, n_days) age_groups = torch.tensor(summary.people_age) posi_cumulative = convert_timings_to_cumulative_daily( timings=posi_started, age_groups=age_groups, time_horizon=n_days * TO_HOURS) if posi_cumulative.shape[0] <= 1: raise ValueError('Must run at least 2 random restarts per setting to get estimate of noise in observation.') # compute mean and standard error of means G = torch.mean(posi_cumulative, dim=0) G_sem = torch.std(posi_cumulative, dim=0) / math.sqrt(posi_cumulative.shape[0]) # make sure noise is not zero for non-degerateness G_sem = torch.max(G_sem, MIN_NOISE) # flatten G = G.reshape(1, n_days * n_age) G_sem = G_sem.reshape(1, n_days * n_age) return G, G_sem def generate_initial_observations(n, logger): """ Takes an integer `n` and generates `n` initial observations from the black box function using Sobol random parameter settings in the unit cube. Returns parameter setting and black box function outputs """ if n <= 0: raise ValueError( 'qKnowledgeGradient and GP needs at least one observation to be defined properly.') # sobol sequence # new_thetas: [n, n_params] new_thetas = torch.tensor( sobol_seq.i4_sobol_generate(n_params, n), dtype=torch.float) # simulator observations # new_G, new_G_sem: [n, n_days * n_age] (flattened outputs) new_G = torch.zeros((n, n_days * n_age), dtype=torch.float) new_G_sem = torch.zeros((n, n_days * n_age), dtype=torch.float) for i in range(n): t0 = time.time() # get mean and standard error of mean (sem) of every simulation output G, G_sem = composite_simulation(new_thetas[i, :]) new_G[i, :] = G new_G_sem[i, :] = G_sem # log G_objectives = objective(new_G[:i+1]) best_idx = G_objectives.argmax() best = G_objectives[best_idx].item() current = objective(G).item() case_diff = ( G.reshape(n_days, n_age)[-1].sum() - G_obs.reshape(n_days, n_age)[-1].sum()) t1 = time.time() logger.log( i=i - n, time=t1 - t0, best=best, objective=current, case_diff=case_diff, theta=transforms.unnormalize(new_thetas[i, :].detach().squeeze(), sim_bounds) ) # save state state = { 'train_theta': new_thetas[:i+1], 'train_G': new_G[:i+1], 'train_G_sem': new_G_sem[:i+1], 'best_observed_obj': best, 'best_observed_idx': best_idx, } save_state(state, logger.filename + '_init') # compute best objective from simulations f = objective(new_G) best_f_idx = f.argmax() best_f = f[best_f_idx].item() return new_thetas, new_G, new_G_sem, best_f, best_f_idx def initialize_model(train_x, train_y, train_y_sem): """ Defines a GP given X, Y, and noise observations (standard error of mean) """ train_ynoise = train_y_sem.pow(2.0) # noise is in variance units # standardize outputs to zero mean, unit variance to have good hyperparameter tuning model = FixedNoiseGP(train_x, train_y, train_ynoise, outcome_transform=Standardize(m=n_days * n_age)) # "Loss" for GPs - the marginal log likelihood mll = ExactMarginalLogLikelihood(model.likelihood, model) return mll, model # Model initialization # parameters used in BO are always in unit cube for optimal hyperparameter tuning of GPs bo_bounds = torch.stack([torch.zeros(n_params), torch.ones(n_params)]) def optimize_acqf_and_get_observation(acq_func, args): """ Optimizes the acquisition function, and returns a new candidate and a noisy observation. botorch defaults: num_restarts=10, raw_samples=256, batch_limit=5, maxiter=200 """ batch_initial_conditions = gen_one_shot_kg_initial_conditions( acq_function=acq_func, bounds=bo_bounds, q=1, num_restarts=args.acqf_opt_num_restarts, raw_samples=args.acqf_opt_raw_samples, options={"batch_limit": args.acqf_opt_batch_limit, "maxiter": args.acqf_opt_maxiter}, ) # optimize acquisition function candidates, _ = optimize_acqf( acq_function=acq_func, bounds=bo_bounds, q=1, num_restarts=args.acqf_opt_num_restarts, raw_samples=args.acqf_opt_raw_samples, # used for intialization heuristic options={"batch_limit": args.acqf_opt_batch_limit, "maxiter": args.acqf_opt_maxiter}, batch_initial_conditions=batch_initial_conditions ) # proposed evaluation new_theta = candidates.detach() # observe new noisy function evaluation new_G, new_G_sem = composite_simulation(new_theta.squeeze()) return new_theta, new_G, new_G_sem # return functions return ( objective, generate_initial_observations, initialize_model, optimize_acqf_and_get_observation, case_diff, unnormalize_theta, header, )
def make_bayes_opt_functions(args): ''' Generates and returns functions used to run Bayesian optimization Argument: args: Keyword arguments specifying exact settings for optimization Returns: objective : objective maximized for BO generate_initial_observations : function to generate initial observations initialize_model : function to initialize GP optimize_acqf_and_get_observation : function to optimize acquisition function based on model case_diff : computes case difference between prediction array and ground truth at t=T unnormalize_theta : converts BO params to simulation params (unit cube to real parameters) header : header lines to be printed to log file ''' header = [] # set parameter bounds based on calibration mode (single beta vs multiple beta) multi_beta_calibration = args.multi_beta_calibration if multi_beta_calibration: param_bounds = calibration_model_param_bounds_multi else: param_bounds = calibration_model_param_bounds_single # remember line executed header.append('=' * 100) header.append(datetime.now().strftime("%d/%m/%Y %H:%M:%S")) header.append('python ' + ' '.join(sys.argv)) header.append('=' * 100) data_country = args.country data_area = args.area mob_settings = args.mob or calibration_mob_paths[data_country][data_area][0] # 0: downscaled, 1: full scale # initialize mobility object to obtain information (no trace generation yet) with open(mob_settings, 'rb') as fp: mob_kwargs = pickle.load(fp) mob = MobilitySimulator(**mob_kwargs) # data settings verbose = not args.not_verbose use_households = not args.no_households data_start_date = args.start or calibration_start_dates[data_country][data_area] data_end_date = args.end or calibration_lockdown_dates[args.country]['end'] per_age_group_objective = args.per_age_group_objective # simulation settings n_init_samples = args.ninit n_iterations = args.niters simulation_roll_outs = args.rollouts cpu_count = args.cpu_count lazy_contacts = not args.no_lazy_contacts load_observations = args.load # set testing parameters testing_params = calibration_testing_params # BO acquisition function optimization (Knowledge gradient) acqf_opt_num_fantasies = args.acqf_opt_num_fantasies acqf_opt_num_restarts = args.acqf_opt_num_restarts acqf_opt_raw_samples = args.acqf_opt_raw_samples acqf_opt_batch_limit = args.acqf_opt_batch_limit acqf_opt_maxiter = args.acqf_opt_maxiter """ Bayesian optimization pipeline """ # Import Covid19 data # Shape (max_days, num_age_groups) unscaled_area_cases = collect_data_from_df(country=data_country, area=data_area, datatype='new', start_date_string=data_start_date, end_date_string=data_end_date) assert(len(unscaled_area_cases.shape) == 2) # Scale down cases based on number of people in town and region sim_cases = downsample_cases(unscaled_area_cases, mob_kwargs) # Generate initial seeds based on unscaled case numbers in town initial_seeds = gen_initial_seeds( sim_cases, day=0) if sum(initial_seeds.values()) == 0: print('No states seeded at start time; cannot start simulation.\n' 'Consider setting a later start date for calibration using the "--start" flag.') exit(0) num_age_groups = sim_cases.shape[1] header.append('Downsampling : {}'.format(mob.downsample)) header.append('Simulation population: {}'.format(mob.num_people)) header.append('Simulation population (unscaled): {}'.format(mob.num_people_unscaled)) header.append('Area population : {}'.format(mob.region_population)) header.append('Initial seed counts : {}'.format(initial_seeds)) scaled_test_capacity = get_test_capacity( country=data_country, area=data_area, mob_settings=mob_kwargs, end_date_string=data_end_date) testing_params['tests_per_batch'] = scaled_test_capacity test_lag_days = int(testing_params['test_reporting_lag'] / TO_HOURS) assert(int(testing_params['test_reporting_lag']) % 24 == 0) # Maximum time fixed by real data, init mobility simulator simulation # maximum time to simulate, in hours max_time = int(sim_cases.shape[0] * TO_HOURS) max_time += TO_HOURS * test_lag_days # simulate longer due to test lag in simulations testing_params['testing_t_window'] = [0.0, max_time] mob.simulate(max_time=max_time, lazy_contacts=True) header.append( 'Target cases per age group at t=0: {} {}'.format(sim_cases[0].sum().item(), list(sim_cases[0].tolist()))) header.append( 'Target cases per age group at t=T: {} {}'.format(sim_cases[-1].sum().item(), list(sim_cases[-1].tolist()))) header.append( 'Daily test capacity in sim.: {}'.format(testing_params['tests_per_batch'])) # instantiate correct distributions distributions = CovidDistributions(country=args.country) # set Bayesian optimization target as positive cases n_days, n_age = sim_cases.shape sim_bounds = pdict_to_parr( pdict=param_bounds, multi_beta_calibration=multi_beta_calibration ).T n_params = sim_bounds.shape[1] header.append(f'Parameters : {n_params}') header.append('Parameter bounds: {}'.format(parr_to_pdict(parr=sim_bounds.T, multi_beta_calibration=multi_beta_calibration))) # extract lockdown period sim_start_date = pd.to_datetime(data_start_date) sim_end_date = sim_start_date + timedelta(days=int(max_time / TO_HOURS)) lockdown_start_date = pd.to_datetime( calibration_lockdown_dates[args.country]['start']) lockdown_end_date = pd.to_datetime( calibration_lockdown_dates[args.country]['end']) days_until_lockdown_start = (lockdown_start_date - sim_start_date).days days_until_lockdown_end = (lockdown_end_date - sim_start_date).days header.append(f'Simulation starts at : {sim_start_date}') header.append(f' ends at : {sim_end_date}') header.append(f'Lockdown starts at : {lockdown_start_date}') header.append(f' ends at : {lockdown_end_date}') header.append(f'Cases compared until : {pd.to_datetime(data_end_date)}') header.append(f' for days : {sim_cases.shape[0]}') # create settings dictionary for simulations launch_kwargs = dict( mob_settings=mob_settings, distributions=distributions, random_repeats=simulation_roll_outs, cpu_count=cpu_count, initial_seeds=initial_seeds, testing_params=testing_params, max_time=max_time, num_people=mob.num_people, num_sites=mob.num_sites, home_loc=mob.home_loc, site_loc=mob.site_loc, lazy_contacts=lazy_contacts, verbose=False) ''' Define central functions for optimization ''' G_obs = torch.tensor(sim_cases).reshape(1, n_days * n_age) G_obs_aggregate = torch.tensor(sim_cases).sum(dim=-1) ''' Objective function Note: in BO and botorch, objectives are maximized ''' if per_age_group_objective: def composite_squared_loss(G): return - (G - G_obs).pow(2).sum(dim=-1) / n_days else: def composite_squared_loss(G): return - (G - G_obs_aggregate).pow(2).sum(dim=-1) / n_days # select objective function objective = GenericMCObjective(composite_squared_loss) def case_diff(preds): ''' Computes aggregate case difference of predictions and ground truth at t=T ''' if per_age_group_objective: return preds[-1].sum(dim=-1) - G_obs_aggregate[-1] else: return preds[-1] - G_obs_aggregate[-1] def unnormalize_theta(theta): ''' Computes unnormalized parameters ''' return transforms.unnormalize(theta, sim_bounds) def composite_simulation(norm_params): """ Takes a set of normalized (unit cube) BO parameters and returns simulator output means and standard errors based on multiple random restarts. This corresponds to the black-box function. """ # un-normalize normalized params to obtain simulation parameters params = transforms.unnormalize(norm_params, sim_bounds) # finalize model parameters based on given parameters and calibration mode kwargs = copy.deepcopy(launch_kwargs) all_params = parr_to_pdict(parr=params, multi_beta_calibration=multi_beta_calibration) if multi_beta_calibration: betas = all_params['betas'] else: betas = { 'education': all_params['beta_site'], 'social': all_params['beta_site'], 'bus_stop': all_params['beta_site'], 'office': all_params['beta_site'], 'supermarket': all_params['beta_site'], } model_params = { 'betas' : betas, 'beta_household' : all_params['beta_household'], } # set exposure parameters kwargs['params'] = model_params # set measure parameters kwargs['measure_list'] = MeasureList([ # standard behavior of positively tested: full isolation SocialDistancingForPositiveMeasure( t_window=Interval(0.0, max_time), p_stay_home=1.0), SocialDistancingForPositiveMeasureHousehold( t_window=Interval(0.0, max_time), p_isolate=1.0), # social distancing factor during lockdown: calibrated SocialDistancingForAllMeasure( t_window=Interval(TO_HOURS * days_until_lockdown_start, TO_HOURS * days_until_lockdown_end), p_stay_home=all_params['p_stay_home']), # site specific measures: fixed in advance, outside of calibration BetaMultiplierMeasureByType( t_window=Interval(TO_HOURS * days_until_lockdown_start, TO_HOURS * days_until_lockdown_end), beta_multiplier=calibration_lockdown_beta_multipliers) ]) # run simulation in parallel, summary = launch_parallel_simulations(**kwargs) # (random_repeats, n_people) posi_started = torch.tensor(summary.state_started_at['posi']) posi_started -= test_lag_days * TO_HOURS # account for test lag in objective computation # (random_repeats, n_days) age_groups = torch.tensor(summary.people_age) # (random_repeats, n_days, n_age_groups) posi_cumulative = convert_timings_to_cumulative_daily( timings=posi_started, age_groups=age_groups, time_horizon=n_days * TO_HOURS) if posi_cumulative.shape[0] <= 1: raise ValueError('Must run at least 2 random restarts per setting to get estimate of noise in observation.') # compute aggregate if not using objective per age-group if not per_age_group_objective: posi_cumulative = posi_cumulative.sum(dim=-1) # compute mean and standard error of means G = torch.mean(posi_cumulative, dim=0) G_sem = torch.std(posi_cumulative, dim=0) / math.sqrt(posi_cumulative.shape[0]) # make sure noise is not zero for non-degenerateness G_sem = torch.max(G_sem, MIN_NOISE) # flatten if per_age_group_objective: G = G.reshape(n_days * n_age) G_sem = G_sem.reshape(n_days * n_age) return G, G_sem def generate_initial_observations(n, logger, loaded_init_theta=None, loaded_init_G=None, loaded_init_G_sem=None): """ Takes an integer `n` and generates `n` initial observations from the black box function using Sobol random parameter settings in the unit cube. Returns parameter setting and black box function outputs. If `loaded_init_theta/G/G_sem` are specified, initialization is loaded (possibly partially, in which case the initialization using the Sobol random sequence is continued where left off). """ if n <= 0: raise ValueError( 'qKnowledgeGradient and GP needs at least one observation to be defined properly.') # sobol sequence proposal points # new_thetas: [n, n_params] new_thetas = torch.tensor( sobol_seq.i4_sobol_generate(n_params, n), dtype=torch.float) # check whether initial observations are loaded loaded = (loaded_init_theta is not None and loaded_init_G is not None and loaded_init_G_sem is not None) if loaded: n_loaded = loaded_init_theta.shape[0] # loaded no. of observations total n_loaded_init = min(n_loaded, n) # loaded no. of quasi-random initialization observations n_init = max(n_loaded, n) # final no. of observations returned, at least quasi-random initializations # check whether loaded proposal points are same as without loading observations try: assert(np.allclose(loaded_init_theta[:n_loaded_init], new_thetas[:n_loaded_init])) except AssertionError: print( '\n\n\n===> Warning: parameters of loaded inital observations ' 'do not coincide with initialization that would have been done. ' 'Double check simulation, ninit, and parameter bounds, which could change ' 'the initial random Sobol sequence. \nThe loaded parameter settings are used. \n\n\n' ) if n_init > n: new_thetas = loaded_init_theta # size of tensor increased to `n_init`, as more than Sobol init points loaded else: n_loaded = 0 # loaded no. of observations total n_loaded_init = 0 # loaded no. of quasi-random initialization observations n_init = n # final no. of observations returned, at least quasi-random initializations # instantiate simulator observation tensors if per_age_group_objective: # new_G, new_G_sem: [n_init, n_days * n_age] (flattened outputs) new_G = torch.zeros((n_init, n_days * n_age), dtype=torch.float) new_G_sem = torch.zeros((n_init, n_days * n_age), dtype=torch.float) else: # new_G, new_G_sem: [n_init, n_days] new_G = torch.zeros((n_init, n_days), dtype=torch.float) new_G_sem = torch.zeros((n_init, n_days), dtype=torch.float) # generate `n` initial evaluations at quasi random settings; if applicable, skip and load expensive evaluation result for i in range(n_init): # if loaded, use initial observation for this parameter settings if loaded and i <= n_loaded - 1: new_thetas[i] = loaded_init_theta[i] G, G_sem = loaded_init_G[i], loaded_init_G_sem[i] walltime = 0.0 # if not loaded, evaluate as usual else: t0 = time.time() G, G_sem = composite_simulation(new_thetas[i]) walltime = time.time() - t0 new_G[i] = G new_G_sem[i] = G_sem # log G_objectives = objective(new_G[:i+1]) best_idx = G_objectives.argmax() best = G_objectives[best_idx].item() current = objective(G).item() if per_age_group_objective: case_diff = G.reshape(n_days, n_age)[-1].sum() - G_obs_aggregate[-1] else: case_diff = G[-1] - G_obs_aggregate[-1] logger.log( i=i - n, time=walltime, best=best, objective=current, case_diff=case_diff, theta=transforms.unnormalize(new_thetas[i, :].detach().squeeze(), sim_bounds) ) # save state state = { 'train_theta': new_thetas[:i+1], 'train_G': new_G[:i+1], 'train_G_sem': new_G_sem[:i+1], 'best_observed_obj': best, 'best_observed_idx': best_idx, } save_state(state, logger.filename) # compute best objective from simulations f = objective(new_G) best_f_idx = f.argmax() best_f = f[best_f_idx].item() return new_thetas, new_G, new_G_sem, best_f, best_f_idx def initialize_model(train_x, train_y, train_y_sem): """ Defines a GP given X, Y, and noise observations (standard error of mean) """ train_ynoise = train_y_sem.pow(2.0) # noise is in variance units # standardize outputs to zero mean, unit variance to have good hyperparameter tuning outcome_transform = Standardize(m=n_days * n_age if per_age_group_objective else n_days) model = FixedNoiseGP(train_x, train_y, train_ynoise, outcome_transform=outcome_transform) # "Loss" for GPs - the marginal log likelihood mll = ExactMarginalLogLikelihood(model.likelihood, model) return mll, model # Model initialization # parameters used in BO are always in unit cube for optimal hyperparameter tuning of GPs bo_bounds = torch.stack([torch.zeros(n_params), torch.ones(n_params)]) def optimize_acqf_and_get_observation(acq_func, args): """ Optimizes the acquisition function, and returns a new candidate and a noisy observation. botorch defaults: num_restarts=10, raw_samples=256, batch_limit=5, maxiter=200 """ batch_initial_conditions = gen_one_shot_kg_initial_conditions( acq_function=acq_func, bounds=bo_bounds, q=1, num_restarts=args.acqf_opt_num_restarts, raw_samples=args.acqf_opt_raw_samples, options={"batch_limit": args.acqf_opt_batch_limit, "maxiter": args.acqf_opt_maxiter}, ) # optimize acquisition function candidates, _ = optimize_acqf( acq_function=acq_func, bounds=bo_bounds, q=1, num_restarts=args.acqf_opt_num_restarts, raw_samples=args.acqf_opt_raw_samples, # used for intialization heuristic options={"batch_limit": args.acqf_opt_batch_limit, "maxiter": args.acqf_opt_maxiter}, batch_initial_conditions=batch_initial_conditions ) # proposed evaluation new_theta = candidates.detach().squeeze() # observe new noisy function evaluation new_G, new_G_sem = composite_simulation(new_theta) return new_theta, new_G, new_G_sem # return functions return ( objective, generate_initial_observations, initialize_model, optimize_acqf_and_get_observation, case_diff, unnormalize_theta, header, )
def get_unique_calibration_params(*, country, area, multi_beta_calibration, maxiters=None): """ Returns all unique parameter settings that ** improved ** the objective during calibration for a `country` and an `area` """ param_bounds = ( calibration_model_param_bounds_multi if multi_beta_calibration else calibration_model_param_bounds_single) sim_bounds = pdict_to_parr( pdict=param_bounds, multi_beta_calibration=multi_beta_calibration ).T state = load_state(calibration_states[country][area]) train_theta = state['train_theta'] train_G = state['train_G'] mob_settings = calibration_mob_paths[country][area][0] with open(mob_settings, 'rb') as fp: mob_kwargs = pickle.load(fp) mob = MobilitySimulator(**mob_kwargs) data_start_date = calibration_start_dates[country][area] data_end_date = calibration_lockdown_dates[country]['end'] unscaled_area_cases = collect_data_from_df(country=country, area=area, datatype='new', start_date_string=data_start_date, end_date_string=data_end_date) assert (len(unscaled_area_cases.shape) == 2) # Scale down cases based on number of people in town and region sim_cases = downsample_cases(unscaled_area_cases, mob_kwargs) n_days, n_age = sim_cases.shape G_obs = torch.tensor(sim_cases).reshape(1, n_days * n_age) G_obs_aggregate = torch.tensor(sim_cases).sum(dim=-1) def objective(G): return - (G - G_obs_aggregate).pow(2).sum(dim=-1) / n_days # if maxiters provided, select submatrix of state if maxiters: train_theta = train_theta[:min(maxiters, train_theta.shape[0])] train_G = train_G[:min(maxiters, train_G.shape[0])] # extract all parameter settings that improved best = - 99999999999999 t = 0 all_params = [] while t < train_theta.shape[0]: theta = train_theta[t] G = train_G[t] obj = objective(G).item() if obj > best: best = obj calibrated_params = transforms.unnormalize(theta, sim_bounds) all_params.append( (t, parr_to_pdict(parr=calibrated_params, multi_beta_calibration=multi_beta_calibration))) t += 1 return all_params
def compute_mob_statistics(loc_tup, days, max_people, verbose=False): '''Computes all MobilitySimulator statistics for given `country` and `area` ''' country, area = loc_tup if verbose: print(country, area) # get mobility simulator settings statistics = dict() mob_settings_downsampled, mob_settings_full = calibration_mob_paths[ country][area] # downsampled with open(mob_settings_downsampled, 'rb') as fp: obj = pickle.load(fp) mob_downsampled = MobilitySimulator(**obj) mob_downsampled.verbose = verbose mob_downsampled.simulate(max_time=days * TO_HOURS, lazy_contacts=True) # full with open(mob_settings_full, 'rb') as fp: obj = pickle.load(fp) mob_full = MobilitySimulator(**obj) mob_full.verbose = verbose mob_full.simulate(max_time=days * TO_HOURS, lazy_contacts=True) # compute contact information contact_info_downsampled = get_stats(mob_downsampled, max_people, verbose=verbose) del mob_downsampled contact_info_full = get_stats(mob_full, max_people, verbose=verbose) del mob_full # summarize for s in contact_info_downsampled.keys(): fig = plt.figure(figsize=(4, 7)) ax0 = fig.add_subplot(211) ax0.hist(contact_info_downsampled[s]) ax0.set_title('downsampled') xlim0 = ax0.get_xlim() ax1 = fig.add_subplot(212) ax1.hist(contact_info_full[s]) ax1.set_title('full') xlim1 = ax1.get_xlim() ax0.set_xlim((min(xlim0[0], xlim1[0]), max(xlim0[1], xlim1[1]))) ax1.set_xlim((min(xlim0[0], xlim1[0]), max(xlim0[1], xlim1[1]))) fig.suptitle(s) plt.savefig('plots/betaScaling-' + loc_tup[0] + '-' + loc_tup[1] + '-' + s + '.png', format='png', facecolor=None, dpi=200, bbox_inches='tight') plt.close('all') d = comp_stats(contact_info_downsampled[s], contact_info_full[s]) for k, v in d.items(): statistics['ratio-' + k + '-' + s] = v # print always print(country, area) pprint(statistics) return statistics