Beispiel #1
0
def run_everything():
    return run_everything_imported(run_states,
                                   MovingWindowModel,
                                   max_date_str,
                                   load_data,
                                   state_models_filename=state_models_filename,
                                   state_report_filename=state_report_filename,
                                   moving_window_size=moving_window_size,
                                   n_bootstraps=n_bootstraps,
                                   n_likelihood_samples=n_likelihood_samples,
                                   load_data_obj=load_data,
                                   sorted_param_names=sorted_param_names,
                                   sorted_init_condit_names=sorted_init_condit_names,
                                   curve_fit_bounds=curve_fit_bounds,
                                   priors=priors,
                                   test_params=test_params,
                                   static_params=static_params,
                                   opt_calc=opt_calc,
                                   opt_force_plot=opt_force_plot,
                                   logarithmic_params=logarithmic_params,
                                   extra_params=extra_params,
                                   plot_param_names=plot_param_names,
                                   opt_statsmodels=True,
                                   opt_simplified=opt_simplified
                                   )
Beispiel #2
0
def _run_everything_sub(region=Region.US_states, override_run_states=None):
    if type(load_data.current_cases_ranked_us_counties) == tuple:
        load_data.current_cases_ranked_us_counties = load_data.current_cases_ranked_us_counties[0]
    if type(load_data.current_cases_ranked_non_us_provinces) == tuple:
        load_data.current_cases_ranked_non_us_provinces = load_data.current_cases_ranked_non_us_provinces[0]

    load_data.current_cases_ranked_non_us_provinces = [x for x in load_data.current_cases_ranked_non_us_provinces \
                                                       if not x.startswith('US:')]
    
    # Remove provinces without enough data
    new_provinces = list()
    for province in load_data.current_cases_ranked_non_us_provinces:
        tmp_dict = load_data.get_state_data(province)
        if tmp_dict['series_data'].shape[1] < 3 or tmp_dict['series_data'].shape[0] < 30 or province.startswith('China:'):
            print(f'Removing province {province}')
            if tmp_dict['series_data'].shape[1] >= 3:
                print(f'  with tmp_dict["series_data"].shape = {tmp_dict["series_data"].shape}')
        else:
            print(f'Keeping province {province}')
            print(f'  with tmp_dict["series_data"].shape = {tmp_dict["series_data"].shape}')
            new_provinces.append(province)
    load_data.current_cases_ranked_non_us_provinces = new_provinces
    

    print('load_data.current_cases_ranked_non_us_provinces')
    [print(x) for x in sorted(load_data.current_cases_ranked_non_us_provinces)]

    if override_run_states is None:
        if region == Region.US_states:
            override_run_states = load_data.current_cases_ranked_us_states
        elif region == Region.US_counties:
            override_run_states = load_data.current_cases_ranked_us_counties
        elif region == Region.countries:
            override_run_states = load_data.current_cases_ranked_non_us_states[:100]
        elif region == Region.provinces:
            override_run_states = load_data.current_cases_ranked_non_us_provinces[:200]
    
        override_run_states = [x for x in override_run_states if not x.startswith(' ')]

    print('Gonna run these states:')
    [print(x) for x in sorted(override_run_states)]

    model_type_name = f'moving_window_{moving_window_size}_days_{region}_region'

    if override_max_date_str is None:
        hyperparameter_max_date_str = datetime.datetime.today().strftime('%Y-%m-%d')
    else:
        hyperparameter_max_date_str = override_max_date_str

    state_models_filename = f'state_models_smoothed_moving_window_{region}_{n_bootstraps}_bootstraps_{n_likelihood_samples}_likelihood_samples_{hyperparameter_max_date_str.replace("-", "_")}_max_date.joblib'
    state_report_filename = f'state_report_smoothed_moving_window_{region}_{n_bootstraps}_bootstraps_{n_likelihood_samples}_likelihood_samples_{hyperparameter_max_date_str.replace("-", "_")}_max_date.joblib'

    # fixing parameters I don't want to train for saves a lot of computer power
    extra_params = dict()
    static_params = {'day0_positive_multiplier': 1,
                     'day0_deceased_multiplier': 1}
    logarithmic_params = ['positive_intercept',
                          'deceased_intercept',
                          'sigma_positive',
                          'sigma_deceased',
                          # 'day0_positive_multiplier',
                          'day1_positive_multiplier',
                          'day2_positive_multiplier',
                          'day3_positive_multiplier',
                          'day4_positive_multiplier',
                          'day5_positive_multiplier',
                          'day6_positive_multiplier',
                          # 'day0_deceased_multiplier',
                          'day1_deceased_multiplier',
                          'day2_deceased_multiplier',
                          'day3_deceased_multiplier',
                          'day4_deceased_multiplier',
                          'day5_deceased_multiplier',
                          'day6_deceased_multiplier',
                          ]
    plot_param_names = ['positive_slope',
                        'positive_intercept',
                        'deceased_slope',
                        'deceased_intercept',
                        'sigma_positive',
                        'sigma_deceased'
                        ]
    if opt_simplified:
        plot_param_names = ['positive_slope',
                            'deceased_slope',
                            'positive_intercept',
                            'deceased_intercept', ]
    sorted_init_condit_names = list()
    sorted_param_names = ['positive_slope',
                          'positive_intercept',
                          'deceased_slope',
                          'deceased_intercept',
                          'sigma_positive',
                          'sigma_deceased',
                          # 'day0_positive_multiplier',
                          'day1_positive_multiplier',
                          'day2_positive_multiplier',
                          'day3_positive_multiplier',
                          'day4_positive_multiplier',
                          'day5_positive_multiplier',
                          'day6_positive_multiplier',
                          # 'day0_deceased_multiplier',
                          'day1_deceased_multiplier',
                          'day2_deceased_multiplier',
                          'day3_deceased_multiplier',
                          'day4_deceased_multiplier',
                          'day5_deceased_multiplier',
                          'day6_deceased_multiplier']

    curve_fit_bounds = {'positive_slope': (-10, 10),
                        'positive_intercept': (0, 1000000),
                        'deceased_slope': (-10, 10),
                        'deceased_intercept': (0, 1000000),
                        'sigma_positive': (0, 100),
                        'sigma_deceased': (0, 100),
                        # 'day0_positive_multiplier': (0, 10),
                        'day1_positive_multiplier': (0, 10),
                        'day2_positive_multiplier': (0, 10),
                        'day3_positive_multiplier': (0, 10),
                        'day4_positive_multiplier': (0, 10),
                        'day5_positive_multiplier': (0, 10),
                        'day6_positive_multiplier': (0, 10),
                        # 'day0_deceased_multiplier': (0, 10),
                        'day1_deceased_multiplier': (0, 10),
                        'day2_deceased_multiplier': (0, 10),
                        'day3_deceased_multiplier': (0, 10),
                        'day4_deceased_multiplier': (0, 10),
                        'day5_deceased_multiplier': (0, 10),
                        'day6_deceased_multiplier': (0, 10)
                        }
    test_params = {'positive_slope': 0,
                   'positive_intercept': 2500,
                   'deceased_slope': 0,
                   'deceased_intercept': 250,
                   'sigma_positive': 0.05,
                   'sigma_deceased': 0.1,
                   # 'day0_positive_multiplier': 1,
                   'day1_positive_multiplier': 1,
                   'day2_positive_multiplier': 1,
                   'day3_positive_multiplier': 1,
                   'day4_positive_multiplier': 1,
                   'day5_positive_multiplier': 1,
                   'day6_positive_multiplier': 1,
                   # 'day0_deceased_multiplier': 1,
                   'day1_deceased_multiplier': 1,
                   'day2_deceased_multiplier': 1,
                   'day3_deceased_multiplier': 1,
                   'day4_deceased_multiplier': 1,
                   'day5_deceased_multiplier': 1,
                   'day6_deceased_multiplier': 1
                   }

    # uniform priors with bounds:
    priors = curve_fit_bounds

    plot_subfolder = run_everything_imported(override_run_states,
                                             MovingWindowModel,
                                             load_data,
                                             model_type_name=model_type_name,
                                             state_models_filename=state_models_filename,
                                             state_report_filename=state_report_filename,
                                             moving_window_size=moving_window_size,
                                             n_bootstraps=n_bootstraps,
                                             n_likelihood_samples=n_likelihood_samples,
                                             load_data_obj=load_data,
                                             sorted_param_names=sorted_param_names,
                                             sorted_init_condit_names=sorted_init_condit_names,
                                             curve_fit_bounds=curve_fit_bounds,
                                             priors=priors,
                                             test_params=test_params,
                                             static_params=static_params,
                                             opt_force_calc=opt_force_calc,
                                             opt_force_plot=opt_force_plot,
                                             logarithmic_params=logarithmic_params,
                                             extra_params=extra_params,
                                             plot_param_names=plot_param_names,
                                             opt_statsmodels=True,
                                             opt_simplified=opt_simplified,
                                             override_max_date_str=override_max_date_str,
                                             )

    return plot_subfolder
Beispiel #3
0
def run_everything():
    if override_max_date_str is None:
        hyperparameter_max_date_str = datetime.datetime.today().strftime('%Y-%m-%d')
    else:
        hyperparameter_max_date_str = override_max_date_str

    state_models_filename = f'state_models_smoothed_convolution_{n_bootstraps}_bootstraps_{n_likelihood_samples}_likelihood_samples_{hyperparameter_max_date_str.replace("-", "_")}_max_date.joblib'
    state_report_filename = f'state_report_smoothed_convolution_{n_bootstraps}_bootstraps_{n_likelihood_samples}_likelihood_samples_{hyperparameter_max_date_str.replace("-", "_")}_max_date.joblib'

    # fixing parameters I don't want to train for saves a lot of computer power
    static_params = {'contagious_to_positive_width': 7,
                     'contagious_to_deceased_width': 7,
                     'contagious_to_positive_mult': 0.1}
    logarithmic_params = ['I_0',
                          'contagious_to_deceased_mult',
                          'sigma_positive',
                          'sigma_deceased']
    sorted_init_condit_names = ['I_0']
    sorted_param_names = ['alpha_1',
                          'alpha_2',
                          'contagious_to_positive_delay',
                          'contagious_to_deceased_delay',
                          'contagious_to_deceased_mult',
                          'sigma_positive',
                          'sigma_deceased'
                          ]
    plot_param_names = ['alpha_1',
                        'alpha_2',
                        'contagious_to_positive_delay',
                        'positive_to_deceased_delay',
                        'positive_to_deceased_mult']

    def get_positive_to_deceased_delay(x, map_name_to_sorted_ind=None):
        return x[map_name_to_sorted_ind['contagious_to_deceased_delay']] - x[
            map_name_to_sorted_ind['contagious_to_positive_delay']]

    def get_positive_to_deceased_mult(x, map_name_to_sorted_ind=None):
        return x[map_name_to_sorted_ind['contagious_to_deceased_mult']] / 0.1

    extra_params = {
        'positive_to_deceased_delay': get_positive_to_deceased_delay,
        'positive_to_deceased_mult': get_positive_to_deceased_mult
    }

    curve_fit_bounds = {'I_0': (1e-12, 100.0),  # starting infections
                        'alpha_1': (-1, 2),
                        'alpha_2': (-1, 2),
                        'sigma_positive': (0, 100),
                        'sigma_deceased': (0, 100),
                        'contagious_to_positive_delay': (-14, 21),
                        # 'contagious_to_positive_width': (0, 14),
                        'contagious_to_deceased_delay': (-14, 42),
                        # 'contagious_to_deceased_width': (0, 14),
                        'contagious_to_deceased_mult': (1e-12, 1),
                        }

    test_params = {'I_0': 2e-3,  # starting infections
                   'alpha_1': 0.23,
                   'alpha_2': 0.01,
                   'sigma_positive': 0.01,
                   'sigma_deceased': 0.2,
                   'contagious_to_positive_delay': 9,
                   # 'contagious_to_positive_width': 7,
                   'contagious_to_deceased_delay': 15,
                   # 'contagious_to_deceased_width': 7,
                   'contagious_to_deceased_mult': 0.01,
                   }

    # uniform priors with bounds:
    priors = curve_fit_bounds

    # cycle over most populous states first
    population_ranked_state_names = sorted(load_data.map_state_to_current_case_cnt.keys(),
                                           key=lambda x: -load_data.map_state_to_current_case_cnt[x])
    run_states = population_ranked_state_names
    if override_run_states is not None:
        run_states = override_run_states

    return run_everything_imported(run_states,
                                   ConvolutionModel,
                                   max_date_str,
                                   load_data,
                                   state_models_filename=state_models_filename,
                                   state_report_filename=state_report_filename,
                                   n_bootstraps=n_bootstraps,
                                   n_likelihood_samples=n_likelihood_samples,
                                   load_data_obj=load_data,
                                   sorted_param_names=sorted_param_names,
                                   sorted_init_condit_names=sorted_init_condit_names,
                                   curve_fit_bounds=curve_fit_bounds,
                                   priors=priors,
                                   test_params=test_params,
                                   static_params=static_params,
                                   opt_force_calc=opt_force_calc,
                                   opt_force_plot=opt_force_plot,
                                   logarithmic_params=logarithmic_params,
                                   extra_params=extra_params,
                                   plot_param_names=plot_param_names,
                                   )
def _run_everything_sub(region=Region.US_states, override_run_states=None):
    if type(load_data.current_cases_ranked_us_counties) == tuple:
        load_data.current_cases_ranked_us_counties = load_data.current_cases_ranked_us_counties[
            0]
    if type(load_data.current_cases_ranked_non_us_provinces) == tuple:
        load_data.current_cases_ranked_non_us_provinces = load_data.current_cases_ranked_non_us_provinces[
            0]

    load_data.current_cases_ranked_non_us_provinces = [x for x in load_data.current_cases_ranked_non_us_provinces \
                                                       if not x.startswith('US:')]

    # Remove provinces without enough data
    new_provinces = list()
    for province in load_data.current_cases_ranked_non_us_provinces:
        tmp_dict = load_data.get_state_data(province)
        if tmp_dict['series_data'].shape[1] < 3 or tmp_dict[
                'series_data'].shape[0] < 30 or province.startswith('China:'):
            print(f'Removing province {province}')
            if tmp_dict['series_data'].shape[1] >= 3:
                print(
                    f'  with tmp_dict["series_data"].shape = {tmp_dict["series_data"].shape}'
                )
        else:
            print(f'Keeping province {province}')
            print(
                f'  with tmp_dict["series_data"].shape = {tmp_dict["series_data"].shape}'
            )
            new_provinces.append(province)
    load_data.current_cases_ranked_non_us_provinces = new_provinces

    print('load_data.current_cases_ranked_non_us_provinces')
    [print(x) for x in sorted(load_data.current_cases_ranked_non_us_provinces)]

    if override_run_states is None:
        if region == Region.US_states:
            override_run_states = load_data.current_cases_ranked_us_states
        elif region == Region.US_counties:
            override_run_states = load_data.current_cases_ranked_us_counties[:
                                                                             70]
        elif region == Region.countries:
            override_run_states = load_data.current_cases_ranked_non_us_states[:
                                                                               70]
        elif region == Region.provinces:
            override_run_states = load_data.current_cases_ranked_non_us_provinces[:
                                                                                  70]

        override_run_states = [
            x for x in override_run_states if not x.startswith(' ')
        ]

    print('Gonna run these states:')
    [print(x) for x in sorted(override_run_states)]

    model_type_name = f'frechet_{region}_region'

    if override_max_date_str is None:
        hyperparameter_max_date_str = datetime.datetime.today().strftime(
            '%Y-%m-%d')
    else:
        hyperparameter_max_date_str = override_max_date_str

    state_models_filename = f'state_models_smoothed_frechet_{n_bootstraps}_bootstraps_{n_likelihood_samples}_likelihood_samples_{hyperparameter_max_date_str.replace("-", "_")}_max_date.joblib'
    state_report_filename = f'state_report_smoothed_frechet_{n_bootstraps}_bootstraps_{n_likelihood_samples}_likelihood_samples_{hyperparameter_max_date_str.replace("-", "_")}_max_date.joblib'

    # fixing parameters I don't want to train for saves a lot of computer power
    static_params = dict()  #{'mult_positive': 3400,
    #'mult_deceased': 3400})
    logarithmic_params = [
        'alpha_positive',
        's_positive',
        'alpha_deceased',
        's_deceased',
        'sigma_positive',
        'sigma_deceased',
    ]

    sorted_init_condit_names = list()
    sorted_param_names = [
        'alpha_positive',
        's_positive',
        'm_positive',
        'mult_positive',
        'alpha_deceased',
        's_deceased',
        'm_deceased',
        'mult_deceased',
        'sigma_positive',
        'sigma_deceased',
    ]

    plot_param_names = [
        'alpha_positive',
        's_positive',
        'm_positive',
        'mult_positive',
        'alpha_deceased',
        's_deceased',
        'm_deceased',
        'mult_deceased',
        'sigma_positive',
        'sigma_deceased',
    ]

    extra_params = dict()

    curve_fit_bounds = {
        'alpha_positive': (1e-8, 1000),
        's_positive': (1e-3, 1000),
        'm_positive': (-1000, 1000),
        'mult_positive': (1e-8, 1e12),
        'alpha_deceased': (1e-8, 1000),
        's_deceased': (1e-3, 1000),
        'm_deceased': (-1000, 1000),
        'mult_deceased': (1e-8, 1e12),
        'sigma_positive': (0, 100),
        'sigma_deceased': (0, 100),
    }

    test_params = {
        'alpha_positive': 8,
        's_positive': 200,
        'm_positive': -50,
        'mult_positive': 2e6,
        'alpha_deceased': 8,
        's_deceased': 140,
        'm_deceased': -50,
        'mult_deceased': 1e5,
        'sigma_positive': 0.5,
        'sigma_deceased': 0.3,
    }

    # uniform priors with bounds:
    priors = curve_fit_bounds

    plot_subfolder = run_everything_imported(
        override_run_states,
        FrechetModel,
        load_data,
        model_type_name=model_type_name,
        state_models_filename=state_models_filename,
        state_report_filename=state_report_filename,
        n_bootstraps=n_bootstraps,
        n_likelihood_samples=n_likelihood_samples,
        load_data_obj=load_data,
        sorted_param_names=sorted_param_names,
        sorted_init_condit_names=sorted_init_condit_names,
        curve_fit_bounds=curve_fit_bounds,
        priors=priors,
        test_params=test_params,
        static_params=static_params,
        opt_force_calc=opt_force_calc,
        opt_force_plot=opt_force_plot,
        logarithmic_params=logarithmic_params,
        extra_params=extra_params,
        plot_param_names=plot_param_names,
        opt_statsmodels=True,
        override_max_date_str=override_max_date_str,
    )

    return plot_subfolder