def test_point_estimate_local_death(): start_day = 80 end_day = 100 ref_2 = Model.open_file( '/Users/karlen/pypm-local/models/covid19/USA/ny_4_2_0109.pypm') sim_2 = Model.open_file( '/Users/karlen/pypm-local/models/covid19/USA/ny_4_2_0109.pypm') # do fit of recover_frac for par_name in ref_2.parameters: par = ref_2.parameters[par_name] par.set_fixed() for par_name in ['recover_frac']: par = ref_2.parameters[par_name] par.set_variable(None, None) sim_2.reset() sim_2.generate_data(end_day) optimizer = Optimizer(ref_2, 'total deaths', sim_2.populations['deaths'].history, [start_day, end_day], cumul_reset=True) optimizer.reset_variables() popt, pcov = optimizer.fit() iii = 1
def test_point_estimates_repeated(): start_day = 12 end_day = 60 ref_2 = Model.open_file(path_model_2_2) sim_2 = Model.open_file(path_model_2_2) # do fit of alpha_0, alpha_1, cont_0 par_names = ['alpha_0', 'alpha_1', 'cont_0'] sums = {} sum2s = {} for par_name in par_names: par = ref_2.parameters[par_name] par.set_variable(None, None) sums[par_name] = 0. sum2s[par_name] = 0. n_rep = 10 fit_stat_list = [] for i in range(n_rep): sim_2.reset() sim_2.generate_data(end_day) optimizer = Optimizer(ref_2, 'total reported', sim_2.populations['reported'].history, [start_day, end_day]) optimizer.reset_variables() popt, pcov = optimizer.fit() fit_stat_list.append(optimizer.fit_statistics) for par_name in par_names: value = ref_2.parameters[par_name].get_value() sums[par_name] += value sum2s[par_name] += value**2 ass_std = {} ass_std['alpha_0'] = 0.03 ass_std['alpha_1'] = 0.01 ass_std['cont_0'] = 10. means = {} std = {} for par_name in par_names: means[par_name] = sums[par_name] / n_rep std[par_name] = np.sqrt(sum2s[par_name] / n_rep - means[par_name]**2) assert std[par_name] < ass_std[par_name] truth = ref_2.parameters[par_name].initial_value assert np.abs((means[par_name] - truth) / std[par_name] / np.sqrt(1. * n_rep)) < 3. ndof = fit_stat_list[0]['ndof'] chi2_list = [fit_stat_list[i]['chi2'] for i in range(n_rep)] chi2_mean = np.mean(chi2_list) assert np.abs(chi2_mean - ndof) < 8. acor_list = [fit_stat_list[i]['acor'] for i in range(n_rep)] acor_mean = np.mean(acor_list) assert np.abs(acor_mean) < 0.2
def fit_deaths(self, model, data, n_rep, verbose, start_deaths): # fit death parameters for par_name in ['alpha_0', 'alpha_1', 'cont_0']: par = model.parameters[par_name] par.set_fixed() # do fit of recover_frac for par_name in ['recover_frac']: par = model.parameters[par_name] par.set_variable(None, None) start_day = start_deaths end_day = len(data['deaths']) - 1 optimizer = Optimizer(model, 'total deaths', data['deaths'], [start_day, end_day], cumul_reset=True) popt, pcov = optimizer.fit() # find and assign uncertainties self.set_std_estimators(model, optimizer, n_rep, verbose)
def test_sim_gof_local(): start_day = 12 end_day = 60 ref_2 = Model.open_file(path_model_2_2) sim_2 = Model.open_file(path_model_2_2) # do fit of alpha_0, alpha_1, cont_0 par_names = ['alpha_1'] for par_name in par_names: par = ref_2.parameters[par_name] par.set_variable(None, None) sim_2.reset() sim_2.generate_data(end_day) sim_2.populations['reported'].history[47] = np.inf optimizer = Optimizer(ref_2, 'total reported', sim_2.populations['reported'].history, [start_day, end_day], cumul_reset=True, skip_data='42,45:48') optimizer.reset_variables() popt, pcov = optimizer.fit() fit_statistics = optimizer.fit_statistics optimizer.calc_chi2s = False optimizer.calc_chi2f = True n_rep = 10 optimizer.calc_sim_gof(n_rep) fit_stat_list = optimizer.fit_stat_list ndof = fit_stat_list[0]['ndof'] chi2_list = [fit_stat_list[i]['chi2'] for i in range(n_rep)] chi2_mean = np.mean(chi2_list) assert np.abs(chi2_mean - ndof) < 8.E6 acor_list = [fit_stat_list[i]['acor'] for i in range(n_rep)] acor_mean = np.mean(acor_list) assert np.abs(acor_mean) < 0.2
def test_point_estimate_daily(): def delta(cumul): diff = [] for i in range(1, len(cumul)): diff.append(cumul[i] - cumul[i - 1]) # first daily value is repeated since val(t0-1) is unknown diff.insert(0, diff[0]) return diff start_day = 12 end_day = 60 ref_2 = Model.open_file(path_model_2_2) sim_2 = Model.open_file(path_model_2_2) # do fit of alpha_0, alpha_1, cont_0, trans_rate_1_time for par_name in ['alpha_0', 'alpha_1', 'cont_0']: par = ref_2.parameters[par_name] par.set_variable(None, None) par = ref_2.parameters['trans_rate_1_time'] par.set_variable(None, None) par.set_min(13) par.set_max(19) sim_2.reset() sim_2.generate_data(end_day) daily_data = delta(sim_2.populations['reported'].history) daily_data[47] = np.inf optimizer = Optimizer(ref_2, 'daily reported', daily_data, [start_day, end_day], skip_data='42,45:48') optimizer.reset_variables() scan_dict = optimizer.i_fit() assert ref_2.parameters['trans_rate_1_time'].get_value() in [15, 16, 17] par = ref_2.parameters['trans_rate_1_time'] par.set_fixed() popt, pcov = optimizer.fit() assert np.abs(ref_2.parameters['alpha_0'].get_value() - ref_2.parameters['alpha_0'].initial_value) < 0.06 assert np.abs(ref_2.parameters['alpha_1'].get_value() - ref_2.parameters['alpha_1'].initial_value) < 0.02 assert np.abs(ref_2.parameters['cont_0'].get_value() - ref_2.parameters['cont_0'].initial_value) < 20.
def test_point_estimate_skip_zeros(): start_day = 12 end_day = 60 ref_2 = Model.open_file(path_model_2_2) sim_2 = Model.open_file(path_model_2_2) # do fit of alpha_1, trans_rate_1_time for par_name in ['alpha_1']: par = ref_2.parameters[par_name] par.set_variable(None, None) par = ref_2.parameters['trans_rate_1_time'] par.set_variable(None, None) par.set_min(13) par.set_max(19) sim_2.reset() rn_dict = sim_2.populations['reported'].get_report_noise() rn_dict['report_days'].set_value(7) sim_2.generate_data(end_day) sim_2.populations['reported'].history[47] = np.inf optimizer = Optimizer(ref_2, 'total reported', sim_2.populations['reported'].history, [start_day, end_day], cumul_reset=True, skip_data='42,45:48', skip_zeros=True) #optimizer = Optimizer(ref_2, 'total reported', sim_2.populations['reported'].history, [start_day, end_day], # cumul_reset=True, skip_zeros=False) optimizer.reset_variables() scan_dict = optimizer.i_fit() assert ref_2.parameters['trans_rate_1_time'].get_value() in [15, 16, 17] par = ref_2.parameters['trans_rate_1_time'] par.set_fixed() popt, pcov = optimizer.fit() assert np.abs(ref_2.parameters['alpha_1'].get_value() - ref_2.parameters['alpha_1'].initial_value) < 0.02
def test_report_noise_days(): for report_noise_weekly in [False, True]: start_day = 12 end_day = 80 ref_2 = Model.open_file(path_model_2_2) ref_2.parameters['report_noise'].set_value(0.1) # BC: no reporting on Sundays ref_2.parameters['report_days'].set_value(63) sim_2 = copy.deepcopy(ref_2) sim_2.populations['reported'].report_noise_weekly = report_noise_weekly # do fit of alpha_0, alpha_1, cont_0 par_names = ['alpha_0', 'alpha_1', 'cont_0'] sums = {} sum2s = {} for par_name in par_names: par = ref_2.parameters[par_name] par.set_variable(None, None) sums[par_name] = 0. sum2s[par_name] = 0. n_rep = 10 fit_stat_list = [] for i in range(n_rep): sim_2.reset() sim_2.generate_data(end_day) optimizer = Optimizer(ref_2, 'total reported', sim_2.populations['reported'].history, [start_day, end_day]) optimizer.reset_variables() popt, pcov = optimizer.fit() fit_stat_list.append(optimizer.fit_statistics) for par_name in par_names: value = ref_2.parameters[par_name].get_value() sums[par_name] += value sum2s[par_name] += value**2 ass_std = {} ass_std['alpha_0'] = 0.05 ass_std['alpha_1'] = 0.01 ass_std['cont_0'] = 10. means = {} std = {} for par_name in par_names: means[par_name] = sums[par_name] / n_rep std[par_name] = np.sqrt(sum2s[par_name] / n_rep - means[par_name]**2) for par_name in par_names: assert std[par_name] < ass_std[par_name] truth = ref_2.parameters[par_name].initial_value assert np.abs((means[par_name] - truth) / std[par_name] / np.sqrt(1. * n_rep)) < 3. ndof = fit_stat_list[0]['ndof'] chi2_list = [fit_stat_list[i]['chi2'] for i in range(n_rep)] chi2_mean = np.mean(chi2_list) assert chi2_mean / ndof > 2.5 if not report_noise_weekly: acor_list = [fit_stat_list[i]['acor'] for i in range(n_rep)] acor_mean = np.mean(acor_list) assert acor_mean < -0.2
if 'cases' in model_choices.FitVariable.lower(): fitting_string += 'reported' elif 'infected' in model_choices.FitVariable.lower(): fitting_string += 'infected' cumulative_reset_from_zero = True if model_choices.CumulReset == True else False # days in the time series on which to start and end data fitting start_fitting_day = 1 # since the data is already filtered by start_date end_fitting_day = min(filtered_data.shape[0], (projection_start_date - start_date).days + 1) # fitting using least squares (detailed by Karlen in the pypmca code) myOptimiser = Optimizer(the_model, fitting_string, filtered_data.Value.values, [start_fitting_day, end_fitting_day], cumulative_reset_from_zero, str(model_choices.SkipDatesText[0])) popt, pcov = myOptimiser.fit() # fetch the names and values of the posteriors, reparameterise the model and write them to datasheet fitted_variables = datasheet(myScenario, "modelKarlenPypm_FitVariables", empty=True) for index in range(len(popt)): name = myOptimiser.variable_names[index] value = popt[index] fitted_variables = fitted_variables.append( { 'Variable': name, 'Value': value
def fit_reported(self, model, data, trans_date_guess, n_rep, verbose): model.set_t0(self.t_0.year, self.t_0.month, self.t_0.day) trans_day_guess = (trans_date_guess - self.t_0).days model.parameters['trans_rate_1_time'].set_value(trans_day_guess) # estimate the number who are contagious using the first 8 days of data reported_8 = data['reported'][7] - data['reported'][0] cont_8 = reported_8 model.parameters['cont_0'].set_value(cont_8) model.parameters['cont_0'].set_max(4 * cont_8) model.boot_pars['boot_value'] = cont_8 / 50. # do fit of alpha_0, alpha_1, cont_0, trans_rate_1_time for par_name in ['alpha_0', 'alpha_1', 'cont_0']: par = model.parameters[par_name] par.set_variable(None, None) # find reasonable values for the parameters start_day = 0 end_day = len(data['reported']) - 1 optimizer = Optimizer(model, 'total reported', data['reported'], [start_day, end_day], cumul_reset=True) popt, pcov = optimizer.fit() for par_name in ['alpha_0', 'alpha_1', 'cont_0']: par = model.parameters[par_name] par.new_initial_value() par = model.parameters['trans_rate_1_time'] par.set_variable(None, None) par.set_min(trans_day_guess) par.set_max(trans_day_guess + 1) scan_dict = optimizer.i_fit() direction = +1 min_chi2 = scan_dict['chi2_list'][1] trans_day = trans_day_guess + 1 if scan_dict['chi2_list'][0] < scan_dict['chi2_list'][1]: direction = -1 min_chi2 = scan_dict['chi2_list'][0] trans_day = trans_day_guess min_found = False while not min_found: trans_day_try = trans_day + direction par.set_min(trans_day_try) par.set_max(trans_day_try) scan_dict = optimizer.i_fit() if scan_dict['chi2_list'][0] < min_chi2: min_chi2 = scan_dict['chi2_list'][0] trans_day = trans_day_try else: min_found = True model.parameters['trans_rate_1_time'].set_value(trans_day) par = model.parameters['trans_rate_1_time'] par.set_fixed() popt, pcov = optimizer.fit() # find and assign uncertainties self.set_std_estimators(model, optimizer, n_rep, verbose) # Include the uncertainty from unknown transition day delta_days = [-2, +2] mod_alphas = [] for delta_day in delta_days: temp_model = copy.deepcopy(model) new_date = temp_model.transitions[ 'trans_rate_1'].transition_time.get_value() + delta_day temp_model.transitions['trans_rate_1'].transition_time.set_value( new_date) temp_optimizer = Optimizer(temp_model, 'total reported', data['reported'], [start_day, end_day], cumul_reset=True) popt, pcov = temp_optimizer.fit() mod_alpha = temp_model.parameters['alpha_1'].get_value() mod_alphas.append(mod_alpha) if verbose: print( 'Transition: trans_rate_1 on day', model.transitions['trans_rate_1'].transition_time.get_value()) print( 'alpha values: \n nom = {0:0.4f} +/- {1:0.4f} \n {2:+d} days = {3:0.4f} \n {4:+d} days = {5:0.4f}' .format(model.parameters['alpha_1'].get_value(), model.parameters['alpha_1'].std_estimator, delta_days[0], mod_alphas[0], delta_days[1], mod_alphas[1])) # while the following should be divided by 2, leave as is to account for larger delta_day possibility mod_alphas_std = np.abs(mod_alphas[0] - mod_alphas[1]) current_std = model.parameters['alpha_1'].std_estimator new_std = np.sqrt(current_std**2 + mod_alphas_std**2) model.parameters['alpha_1'].std_estimator = new_std if verbose: print('Additional error included in final alpha: {0:0.4f}'.format( mod_alphas_std))