from matplotlib import pyplot as plt case_counts = parse_tsv() scenario_data = load_population_data() age_distributions = load_distribution() # region = 'JPN-Kagawa' region = 'United States of America' # region = 'Germany' region = 'Switzerland' region = 'USA-Texas' age_dis = age_distributions[scenario_data[region]['ages']] region, p, fit_params = fit_population( (region, case_counts[region], scenario_data[region], age_dis, True)) model_data = generate_data(fit_params) model_cases = model_data['cases'][7:] - model_data['cases'][:-7] model_deaths = model_data['deaths'][7:] - model_data['deaths'][:-7] model_time = fit_params.time[7:] cases = cumulative_to_rolling_average( convert_to_vectors(case_counts[region])) print(fit_params.reported, np.exp(fit_params.logInitial)) print(get_IFR(age_dis)) plt.plot(model_time, model_cases) plt.plot(model_time, model_deaths) plt.plot(cases['time'], cases['cases']) plt.plot(cases['time'], cases['deaths']) plt.yscale('log')
def fit_population(args): tp_to_eval_seroprevalence = 30 from scripts.estimate_R0 import estimate_Re from scripts.load_utils import cumulative_to_rolling_average region, json_case_data, pop_params, age_distribution, return_fit_res = args print(f"starting fit for {region}") time_range_fit = 60 data = convert_to_vectors(json_case_data) weekly_data = cumulative_to_rolling_average(data) if weekly_data['cases'] is None or weekly_data['deaths'] is None: return (region, None, None) if return_fit_res else (region, None) weekly_data_for_fit = { k: v[-time_range_fit + 7:] for k, v in weekly_data.items() } # estimate Re and bound from above (usually a problem at the very beginning of a wave) res = estimate_Re(weekly_data['cases'], imports=1, cutoff=5, chop=3) Re = np.minimum(pop_params['r0'], res['Re']) Refit = fit_REblocks(weekly_data['time'][:-8], Re) piecewise_constant_Re = Refit.predict(weekly_data['time'].reshape(-1, 1)) change_points = np.concatenate( [[-1], np.where(np.diff(piecewise_constant_Re) != 0)[0], [len(Re) - 1]]) + 1 total_deaths_at_start = data['deaths'][-tp_to_eval_seroprevalence] IFR = get_IFR(age_distribution) reporting_fraction = get_reporting_fraction(data['cases'], data['deaths'], IFR) n_cases_at_start = total_deaths_at_start / IFR seroprevalence = n_cases_at_start / pop_params['size'] if np.isnan(seroprevalence) or (type(seroprevalence) == np.ma.core.MaskedConstant and seroprevalence.mask == True): seroprevalence = 0 mitigations = [] for ci, cp in enumerate(change_points[:-1]): mitigations.append({ 'tMin': int(weekly_data['time'][cp]), 'tMax': int(weekly_data['time'][change_points[ci + 1] - 1]), 'value': max( 0, min( 1, float(1 - piecewise_constant_Re[cp] / (1 - seroprevalence) / pop_params['r0']))) }) tmin = data['time'][-time_range_fit] average_Re = np.mean(Re[-30:]) fixed_params = { 'logR0': np.log(pop_params['r0']), 'efficacy': 1 - average_Re / pop_params['r0'], 'containment_start': tmin, 'seroprevalence': seroprevalence } if (weekly_data_for_fit['cases'][0]): guess = { 'logInitial': np.log( (0.5 + weekly_data_for_fit['cases'][0]) / reporting_fraction), 'reported': reporting_fraction } elif (weekly_data_for_fit['deaths'][0]) and reporting_fraction > 0: guess = { 'logInitial': np.log((0.5 + weekly_data_for_fit['deaths'][0]) / reporting_fraction / IFR), 'reported': reporting_fraction } elif (weekly_data_for_fit['deaths'][0]): guess = { 'logInitial': np.log((0.5 + weekly_data_for_fit['deaths'][0]) / 0.3 / IFR), 'reported': reporting_fraction } else: guess = {'logInitial': np.log(1), 'reported': reporting_fraction} fit_result, success = fit_params(data['time'][-time_range_fit:], weekly_data_for_fit, guess, age_distribution, pop_params['size'], fixed_params=fixed_params) params = {} params.update(fixed_params) for p in guess: params[p] = fit_result.__getattribute__(p) for p in params: params[p] = float(params[p]) params['mitigations'] = mitigations params['tMin'] = datetime.fromordinal(tmin).date().strftime('%Y-%m-%d') params['tMax'] = datetime.fromordinal(tmin + 90).date().strftime('%Y-%m-%d') if return_fit_res: return (region, params, fit_result) else: return (region, params) import ipdb ipdb.set_trace()
def generate(output_json, num_procs=1, recalculate=False): import re scenarios = [] case_counts = parse_tsv() scenario_data = load_population_data() time_range_fit = 60 for fname in (FIT_PARAMETERS, 'fit_parameters_1stwave.json'): first_wave = '1stwave' in fname print("reading file", fname) fit_fname = os.path.join(BASE_PATH, fname) if (recalculate or (not os.path.isfile(fit_fname))) and (not first_wave): results = fit_all_case_data(num_procs) with open(fit_fname, 'w') as fh: json.dump(results, fh) else: with open(fit_fname, 'r') as fh: results = json.load(fh) for region in scenario_data: if region not in results or results[ region] is None or region.startswith('FRA-'): continue if first_wave: # skip if a small region or not fit to case data (no 'containment_start') results[region]['logInitial'] = np.log( results[region]['initialCases']) results[region]['tMax'] = '2020-08-31' results[region]['seroprevalence'] = 0.0 if (re.match('[A-Z][A-Z][A-Z]-', region) and results[region]['initialCases'] < 100) or ( 'containment_start' not in results[region]): continue elif np.isnan(results[region]['logInitial']) or np.isinf( results[region]['logInitial']): continue scenario = AllParams(**scenario_data[region], tMin=results[region]['tMin'], tMax=results[region]['tMax'], cases_key=region if region in case_counts else 'None') if first_wave: scenario.mitigation.mitigation_intervals = [ MitigationInterval( name="Intervention 1", tMin=datetime.strptime( results[region]['containment_start'], '%Y-%m-%d').date(), id=uuid4(), tMax=scenario.simulation.simulation_time_range.end + timedelta(1), color=mitigation_colors.get("Intervention 1", "#cccccc"), mitigationValue=round(100 * results[region]['efficacy'])) ] elif region in case_counts: set_mitigation(scenario, results[region].get('mitigations', [])) data = convert_to_vectors(case_counts[region]) weekly_data = cumulative_to_rolling_average(data) tmp_initial = weekly_data['cases'][-time_range_fit] else: scenario.mitigation.mitigation_intervals = [] if len(scenario.mitigation.mitigation_intervals): scenario.mitigation.mitigation_intervals[ -1].time_range.end = datetime.strptime( results[region]['tMax'], '%Y-%m-%d').date() + timedelta(1) scenario.population.seroprevalence = round( 100 * results[region]['seroprevalence'], 2) print(region, tmp_initial) scenario.population.initial_number_of_cases = int( round(tmp_initial * 5)) if first_wave: scenario_name = f"[1st wave] {region}" else: scenario_name = region scenarios.append(ScenarioData(scenario, scenario_name)) with open(output_json, "w+") as fd: output = ScenarioArray(scenarios) output.marshalJSON(fd)