def run_everything(): return run_everything_imported(run_states, MovingWindowModel, max_date_str, load_data, state_models_filename=state_models_filename, state_report_filename=state_report_filename, moving_window_size=moving_window_size, n_bootstraps=n_bootstraps, n_likelihood_samples=n_likelihood_samples, load_data_obj=load_data, sorted_param_names=sorted_param_names, sorted_init_condit_names=sorted_init_condit_names, curve_fit_bounds=curve_fit_bounds, priors=priors, test_params=test_params, static_params=static_params, opt_calc=opt_calc, opt_force_plot=opt_force_plot, logarithmic_params=logarithmic_params, extra_params=extra_params, plot_param_names=plot_param_names, opt_statsmodels=True, opt_simplified=opt_simplified )
def _run_everything_sub(region=Region.US_states, override_run_states=None): if type(load_data.current_cases_ranked_us_counties) == tuple: load_data.current_cases_ranked_us_counties = load_data.current_cases_ranked_us_counties[0] if type(load_data.current_cases_ranked_non_us_provinces) == tuple: load_data.current_cases_ranked_non_us_provinces = load_data.current_cases_ranked_non_us_provinces[0] load_data.current_cases_ranked_non_us_provinces = [x for x in load_data.current_cases_ranked_non_us_provinces \ if not x.startswith('US:')] # Remove provinces without enough data new_provinces = list() for province in load_data.current_cases_ranked_non_us_provinces: tmp_dict = load_data.get_state_data(province) if tmp_dict['series_data'].shape[1] < 3 or tmp_dict['series_data'].shape[0] < 30 or province.startswith('China:'): print(f'Removing province {province}') if tmp_dict['series_data'].shape[1] >= 3: print(f' with tmp_dict["series_data"].shape = {tmp_dict["series_data"].shape}') else: print(f'Keeping province {province}') print(f' with tmp_dict["series_data"].shape = {tmp_dict["series_data"].shape}') new_provinces.append(province) load_data.current_cases_ranked_non_us_provinces = new_provinces print('load_data.current_cases_ranked_non_us_provinces') [print(x) for x in sorted(load_data.current_cases_ranked_non_us_provinces)] if override_run_states is None: if region == Region.US_states: override_run_states = load_data.current_cases_ranked_us_states elif region == Region.US_counties: override_run_states = load_data.current_cases_ranked_us_counties elif region == Region.countries: override_run_states = load_data.current_cases_ranked_non_us_states[:100] elif region == Region.provinces: override_run_states = load_data.current_cases_ranked_non_us_provinces[:200] override_run_states = [x for x in override_run_states if not x.startswith(' ')] print('Gonna run these states:') [print(x) for x in sorted(override_run_states)] model_type_name = f'moving_window_{moving_window_size}_days_{region}_region' if override_max_date_str is None: hyperparameter_max_date_str = datetime.datetime.today().strftime('%Y-%m-%d') else: hyperparameter_max_date_str = override_max_date_str state_models_filename = f'state_models_smoothed_moving_window_{region}_{n_bootstraps}_bootstraps_{n_likelihood_samples}_likelihood_samples_{hyperparameter_max_date_str.replace("-", "_")}_max_date.joblib' state_report_filename = f'state_report_smoothed_moving_window_{region}_{n_bootstraps}_bootstraps_{n_likelihood_samples}_likelihood_samples_{hyperparameter_max_date_str.replace("-", "_")}_max_date.joblib' # fixing parameters I don't want to train for saves a lot of computer power extra_params = dict() static_params = {'day0_positive_multiplier': 1, 'day0_deceased_multiplier': 1} logarithmic_params = ['positive_intercept', 'deceased_intercept', 'sigma_positive', 'sigma_deceased', # 'day0_positive_multiplier', 'day1_positive_multiplier', 'day2_positive_multiplier', 'day3_positive_multiplier', 'day4_positive_multiplier', 'day5_positive_multiplier', 'day6_positive_multiplier', # 'day0_deceased_multiplier', 'day1_deceased_multiplier', 'day2_deceased_multiplier', 'day3_deceased_multiplier', 'day4_deceased_multiplier', 'day5_deceased_multiplier', 'day6_deceased_multiplier', ] plot_param_names = ['positive_slope', 'positive_intercept', 'deceased_slope', 'deceased_intercept', 'sigma_positive', 'sigma_deceased' ] if opt_simplified: plot_param_names = ['positive_slope', 'deceased_slope', 'positive_intercept', 'deceased_intercept', ] sorted_init_condit_names = list() sorted_param_names = ['positive_slope', 'positive_intercept', 'deceased_slope', 'deceased_intercept', 'sigma_positive', 'sigma_deceased', # 'day0_positive_multiplier', 'day1_positive_multiplier', 'day2_positive_multiplier', 'day3_positive_multiplier', 'day4_positive_multiplier', 'day5_positive_multiplier', 'day6_positive_multiplier', # 'day0_deceased_multiplier', 'day1_deceased_multiplier', 'day2_deceased_multiplier', 'day3_deceased_multiplier', 'day4_deceased_multiplier', 'day5_deceased_multiplier', 'day6_deceased_multiplier'] curve_fit_bounds = {'positive_slope': (-10, 10), 'positive_intercept': (0, 1000000), 'deceased_slope': (-10, 10), 'deceased_intercept': (0, 1000000), 'sigma_positive': (0, 100), 'sigma_deceased': (0, 100), # 'day0_positive_multiplier': (0, 10), 'day1_positive_multiplier': (0, 10), 'day2_positive_multiplier': (0, 10), 'day3_positive_multiplier': (0, 10), 'day4_positive_multiplier': (0, 10), 'day5_positive_multiplier': (0, 10), 'day6_positive_multiplier': (0, 10), # 'day0_deceased_multiplier': (0, 10), 'day1_deceased_multiplier': (0, 10), 'day2_deceased_multiplier': (0, 10), 'day3_deceased_multiplier': (0, 10), 'day4_deceased_multiplier': (0, 10), 'day5_deceased_multiplier': (0, 10), 'day6_deceased_multiplier': (0, 10) } test_params = {'positive_slope': 0, 'positive_intercept': 2500, 'deceased_slope': 0, 'deceased_intercept': 250, 'sigma_positive': 0.05, 'sigma_deceased': 0.1, # 'day0_positive_multiplier': 1, 'day1_positive_multiplier': 1, 'day2_positive_multiplier': 1, 'day3_positive_multiplier': 1, 'day4_positive_multiplier': 1, 'day5_positive_multiplier': 1, 'day6_positive_multiplier': 1, # 'day0_deceased_multiplier': 1, 'day1_deceased_multiplier': 1, 'day2_deceased_multiplier': 1, 'day3_deceased_multiplier': 1, 'day4_deceased_multiplier': 1, 'day5_deceased_multiplier': 1, 'day6_deceased_multiplier': 1 } # uniform priors with bounds: priors = curve_fit_bounds plot_subfolder = run_everything_imported(override_run_states, MovingWindowModel, load_data, model_type_name=model_type_name, state_models_filename=state_models_filename, state_report_filename=state_report_filename, moving_window_size=moving_window_size, n_bootstraps=n_bootstraps, n_likelihood_samples=n_likelihood_samples, load_data_obj=load_data, sorted_param_names=sorted_param_names, sorted_init_condit_names=sorted_init_condit_names, curve_fit_bounds=curve_fit_bounds, priors=priors, test_params=test_params, static_params=static_params, opt_force_calc=opt_force_calc, opt_force_plot=opt_force_plot, logarithmic_params=logarithmic_params, extra_params=extra_params, plot_param_names=plot_param_names, opt_statsmodels=True, opt_simplified=opt_simplified, override_max_date_str=override_max_date_str, ) return plot_subfolder
def run_everything(): if override_max_date_str is None: hyperparameter_max_date_str = datetime.datetime.today().strftime('%Y-%m-%d') else: hyperparameter_max_date_str = override_max_date_str state_models_filename = f'state_models_smoothed_convolution_{n_bootstraps}_bootstraps_{n_likelihood_samples}_likelihood_samples_{hyperparameter_max_date_str.replace("-", "_")}_max_date.joblib' state_report_filename = f'state_report_smoothed_convolution_{n_bootstraps}_bootstraps_{n_likelihood_samples}_likelihood_samples_{hyperparameter_max_date_str.replace("-", "_")}_max_date.joblib' # fixing parameters I don't want to train for saves a lot of computer power static_params = {'contagious_to_positive_width': 7, 'contagious_to_deceased_width': 7, 'contagious_to_positive_mult': 0.1} logarithmic_params = ['I_0', 'contagious_to_deceased_mult', 'sigma_positive', 'sigma_deceased'] sorted_init_condit_names = ['I_0'] sorted_param_names = ['alpha_1', 'alpha_2', 'contagious_to_positive_delay', 'contagious_to_deceased_delay', 'contagious_to_deceased_mult', 'sigma_positive', 'sigma_deceased' ] plot_param_names = ['alpha_1', 'alpha_2', 'contagious_to_positive_delay', 'positive_to_deceased_delay', 'positive_to_deceased_mult'] def get_positive_to_deceased_delay(x, map_name_to_sorted_ind=None): return x[map_name_to_sorted_ind['contagious_to_deceased_delay']] - x[ map_name_to_sorted_ind['contagious_to_positive_delay']] def get_positive_to_deceased_mult(x, map_name_to_sorted_ind=None): return x[map_name_to_sorted_ind['contagious_to_deceased_mult']] / 0.1 extra_params = { 'positive_to_deceased_delay': get_positive_to_deceased_delay, 'positive_to_deceased_mult': get_positive_to_deceased_mult } curve_fit_bounds = {'I_0': (1e-12, 100.0), # starting infections 'alpha_1': (-1, 2), 'alpha_2': (-1, 2), 'sigma_positive': (0, 100), 'sigma_deceased': (0, 100), 'contagious_to_positive_delay': (-14, 21), # 'contagious_to_positive_width': (0, 14), 'contagious_to_deceased_delay': (-14, 42), # 'contagious_to_deceased_width': (0, 14), 'contagious_to_deceased_mult': (1e-12, 1), } test_params = {'I_0': 2e-3, # starting infections 'alpha_1': 0.23, 'alpha_2': 0.01, 'sigma_positive': 0.01, 'sigma_deceased': 0.2, 'contagious_to_positive_delay': 9, # 'contagious_to_positive_width': 7, 'contagious_to_deceased_delay': 15, # 'contagious_to_deceased_width': 7, 'contagious_to_deceased_mult': 0.01, } # uniform priors with bounds: priors = curve_fit_bounds # cycle over most populous states first population_ranked_state_names = sorted(load_data.map_state_to_current_case_cnt.keys(), key=lambda x: -load_data.map_state_to_current_case_cnt[x]) run_states = population_ranked_state_names if override_run_states is not None: run_states = override_run_states return run_everything_imported(run_states, ConvolutionModel, max_date_str, load_data, state_models_filename=state_models_filename, state_report_filename=state_report_filename, n_bootstraps=n_bootstraps, n_likelihood_samples=n_likelihood_samples, load_data_obj=load_data, sorted_param_names=sorted_param_names, sorted_init_condit_names=sorted_init_condit_names, curve_fit_bounds=curve_fit_bounds, priors=priors, test_params=test_params, static_params=static_params, opt_force_calc=opt_force_calc, opt_force_plot=opt_force_plot, logarithmic_params=logarithmic_params, extra_params=extra_params, plot_param_names=plot_param_names, )
def _run_everything_sub(region=Region.US_states, override_run_states=None): if type(load_data.current_cases_ranked_us_counties) == tuple: load_data.current_cases_ranked_us_counties = load_data.current_cases_ranked_us_counties[ 0] if type(load_data.current_cases_ranked_non_us_provinces) == tuple: load_data.current_cases_ranked_non_us_provinces = load_data.current_cases_ranked_non_us_provinces[ 0] load_data.current_cases_ranked_non_us_provinces = [x for x in load_data.current_cases_ranked_non_us_provinces \ if not x.startswith('US:')] # Remove provinces without enough data new_provinces = list() for province in load_data.current_cases_ranked_non_us_provinces: tmp_dict = load_data.get_state_data(province) if tmp_dict['series_data'].shape[1] < 3 or tmp_dict[ 'series_data'].shape[0] < 30 or province.startswith('China:'): print(f'Removing province {province}') if tmp_dict['series_data'].shape[1] >= 3: print( f' with tmp_dict["series_data"].shape = {tmp_dict["series_data"].shape}' ) else: print(f'Keeping province {province}') print( f' with tmp_dict["series_data"].shape = {tmp_dict["series_data"].shape}' ) new_provinces.append(province) load_data.current_cases_ranked_non_us_provinces = new_provinces print('load_data.current_cases_ranked_non_us_provinces') [print(x) for x in sorted(load_data.current_cases_ranked_non_us_provinces)] if override_run_states is None: if region == Region.US_states: override_run_states = load_data.current_cases_ranked_us_states elif region == Region.US_counties: override_run_states = load_data.current_cases_ranked_us_counties[: 70] elif region == Region.countries: override_run_states = load_data.current_cases_ranked_non_us_states[: 70] elif region == Region.provinces: override_run_states = load_data.current_cases_ranked_non_us_provinces[: 70] override_run_states = [ x for x in override_run_states if not x.startswith(' ') ] print('Gonna run these states:') [print(x) for x in sorted(override_run_states)] model_type_name = f'frechet_{region}_region' if override_max_date_str is None: hyperparameter_max_date_str = datetime.datetime.today().strftime( '%Y-%m-%d') else: hyperparameter_max_date_str = override_max_date_str state_models_filename = f'state_models_smoothed_frechet_{n_bootstraps}_bootstraps_{n_likelihood_samples}_likelihood_samples_{hyperparameter_max_date_str.replace("-", "_")}_max_date.joblib' state_report_filename = f'state_report_smoothed_frechet_{n_bootstraps}_bootstraps_{n_likelihood_samples}_likelihood_samples_{hyperparameter_max_date_str.replace("-", "_")}_max_date.joblib' # fixing parameters I don't want to train for saves a lot of computer power static_params = dict() #{'mult_positive': 3400, #'mult_deceased': 3400}) logarithmic_params = [ 'alpha_positive', 's_positive', 'alpha_deceased', 's_deceased', 'sigma_positive', 'sigma_deceased', ] sorted_init_condit_names = list() sorted_param_names = [ 'alpha_positive', 's_positive', 'm_positive', 'mult_positive', 'alpha_deceased', 's_deceased', 'm_deceased', 'mult_deceased', 'sigma_positive', 'sigma_deceased', ] plot_param_names = [ 'alpha_positive', 's_positive', 'm_positive', 'mult_positive', 'alpha_deceased', 's_deceased', 'm_deceased', 'mult_deceased', 'sigma_positive', 'sigma_deceased', ] extra_params = dict() curve_fit_bounds = { 'alpha_positive': (1e-8, 1000), 's_positive': (1e-3, 1000), 'm_positive': (-1000, 1000), 'mult_positive': (1e-8, 1e12), 'alpha_deceased': (1e-8, 1000), 's_deceased': (1e-3, 1000), 'm_deceased': (-1000, 1000), 'mult_deceased': (1e-8, 1e12), 'sigma_positive': (0, 100), 'sigma_deceased': (0, 100), } test_params = { 'alpha_positive': 8, 's_positive': 200, 'm_positive': -50, 'mult_positive': 2e6, 'alpha_deceased': 8, 's_deceased': 140, 'm_deceased': -50, 'mult_deceased': 1e5, 'sigma_positive': 0.5, 'sigma_deceased': 0.3, } # uniform priors with bounds: priors = curve_fit_bounds plot_subfolder = run_everything_imported( override_run_states, FrechetModel, load_data, model_type_name=model_type_name, state_models_filename=state_models_filename, state_report_filename=state_report_filename, n_bootstraps=n_bootstraps, n_likelihood_samples=n_likelihood_samples, load_data_obj=load_data, sorted_param_names=sorted_param_names, sorted_init_condit_names=sorted_init_condit_names, curve_fit_bounds=curve_fit_bounds, priors=priors, test_params=test_params, static_params=static_params, opt_force_calc=opt_force_calc, opt_force_plot=opt_force_plot, logarithmic_params=logarithmic_params, extra_params=extra_params, plot_param_names=plot_param_names, opt_statsmodels=True, override_max_date_str=override_max_date_str, ) return plot_subfolder