예제 #1
0
    from matplotlib import pyplot as plt

    case_counts = parse_tsv()
    scenario_data = load_population_data()
    age_distributions = load_distribution()
    # region = 'JPN-Kagawa'
    region = 'United States of America'
    # region = 'Germany'
    region = 'Switzerland'
    region = 'USA-Texas'
    age_dis = age_distributions[scenario_data[region]['ages']]
    region, p, fit_params = fit_population(
        (region, case_counts[region], scenario_data[region], age_dis, True))

    model_data = generate_data(fit_params)
    model_cases = model_data['cases'][7:] - model_data['cases'][:-7]
    model_deaths = model_data['deaths'][7:] - model_data['deaths'][:-7]
    model_time = fit_params.time[7:]

    cases = cumulative_to_rolling_average(
        convert_to_vectors(case_counts[region]))

    print(fit_params.reported, np.exp(fit_params.logInitial))
    print(get_IFR(age_dis))

    plt.plot(model_time, model_cases)
    plt.plot(model_time, model_deaths)
    plt.plot(cases['time'], cases['cases'])
    plt.plot(cases['time'], cases['deaths'])
    plt.yscale('log')
예제 #2
0
def fit_population(args):
    tp_to_eval_seroprevalence = 30
    from scripts.estimate_R0 import estimate_Re
    from scripts.load_utils import cumulative_to_rolling_average
    region, json_case_data, pop_params, age_distribution, return_fit_res = args

    print(f"starting fit for {region}")
    time_range_fit = 60
    data = convert_to_vectors(json_case_data)
    weekly_data = cumulative_to_rolling_average(data)
    if weekly_data['cases'] is None or weekly_data['deaths'] is None:
        return (region, None, None) if return_fit_res else (region, None)

    weekly_data_for_fit = {
        k: v[-time_range_fit + 7:]
        for k, v in weekly_data.items()
    }

    # estimate Re and bound from above (usually a problem at the very beginning of a wave)
    res = estimate_Re(weekly_data['cases'], imports=1, cutoff=5, chop=3)
    Re = np.minimum(pop_params['r0'], res['Re'])
    Refit = fit_REblocks(weekly_data['time'][:-8], Re)
    piecewise_constant_Re = Refit.predict(weekly_data['time'].reshape(-1, 1))
    change_points = np.concatenate(
        [[-1],
         np.where(np.diff(piecewise_constant_Re) != 0)[0], [len(Re) - 1]]) + 1

    total_deaths_at_start = data['deaths'][-tp_to_eval_seroprevalence]
    IFR = get_IFR(age_distribution)
    reporting_fraction = get_reporting_fraction(data['cases'], data['deaths'],
                                                IFR)
    n_cases_at_start = total_deaths_at_start / IFR
    seroprevalence = n_cases_at_start / pop_params['size']
    if np.isnan(seroprevalence) or (type(seroprevalence)
                                    == np.ma.core.MaskedConstant
                                    and seroprevalence.mask == True):
        seroprevalence = 0

    mitigations = []
    for ci, cp in enumerate(change_points[:-1]):
        mitigations.append({
            'tMin':
            int(weekly_data['time'][cp]),
            'tMax':
            int(weekly_data['time'][change_points[ci + 1] - 1]),
            'value':
            max(
                0,
                min(
                    1,
                    float(1 - piecewise_constant_Re[cp] /
                          (1 - seroprevalence) / pop_params['r0'])))
        })

    tmin = data['time'][-time_range_fit]
    average_Re = np.mean(Re[-30:])
    fixed_params = {
        'logR0': np.log(pop_params['r0']),
        'efficacy': 1 - average_Re / pop_params['r0'],
        'containment_start': tmin,
        'seroprevalence': seroprevalence
    }
    if (weekly_data_for_fit['cases'][0]):
        guess = {
            'logInitial':
            np.log(
                (0.5 + weekly_data_for_fit['cases'][0]) / reporting_fraction),
            'reported':
            reporting_fraction
        }
    elif (weekly_data_for_fit['deaths'][0]) and reporting_fraction > 0:
        guess = {
            'logInitial':
            np.log((0.5 + weekly_data_for_fit['deaths'][0]) /
                   reporting_fraction / IFR),
            'reported':
            reporting_fraction
        }
    elif (weekly_data_for_fit['deaths'][0]):
        guess = {
            'logInitial':
            np.log((0.5 + weekly_data_for_fit['deaths'][0]) / 0.3 / IFR),
            'reported':
            reporting_fraction
        }
    else:
        guess = {'logInitial': np.log(1), 'reported': reporting_fraction}

    fit_result, success = fit_params(data['time'][-time_range_fit:],
                                     weekly_data_for_fit,
                                     guess,
                                     age_distribution,
                                     pop_params['size'],
                                     fixed_params=fixed_params)

    params = {}
    params.update(fixed_params)
    for p in guess:
        params[p] = fit_result.__getattribute__(p)
    for p in params:
        params[p] = float(params[p])

    params['mitigations'] = mitigations
    params['tMin'] = datetime.fromordinal(tmin).date().strftime('%Y-%m-%d')
    params['tMax'] = datetime.fromordinal(tmin +
                                          90).date().strftime('%Y-%m-%d')
    if return_fit_res:
        return (region, params, fit_result)
    else:
        return (region, params)
        import ipdb
        ipdb.set_trace()
예제 #3
0
def generate(output_json, num_procs=1, recalculate=False):
    import re
    scenarios = []
    case_counts = parse_tsv()
    scenario_data = load_population_data()

    time_range_fit = 60
    for fname in (FIT_PARAMETERS, 'fit_parameters_1stwave.json'):
        first_wave = '1stwave' in fname
        print("reading file", fname)
        fit_fname = os.path.join(BASE_PATH, fname)
        if (recalculate or
            (not os.path.isfile(fit_fname))) and (not first_wave):
            results = fit_all_case_data(num_procs)
            with open(fit_fname, 'w') as fh:
                json.dump(results, fh)
        else:
            with open(fit_fname, 'r') as fh:
                results = json.load(fh)

        for region in scenario_data:
            if region not in results or results[
                    region] is None or region.startswith('FRA-'):
                continue
            if first_wave:  # skip if a small region or not fit to case data (no 'containment_start')
                results[region]['logInitial'] = np.log(
                    results[region]['initialCases'])
                results[region]['tMax'] = '2020-08-31'
                results[region]['seroprevalence'] = 0.0
                if (re.match('[A-Z][A-Z][A-Z]-', region)
                        and results[region]['initialCases'] < 100) or (
                            'containment_start' not in results[region]):
                    continue
            elif np.isnan(results[region]['logInitial']) or np.isinf(
                    results[region]['logInitial']):
                continue

            scenario = AllParams(**scenario_data[region],
                                 tMin=results[region]['tMin'],
                                 tMax=results[region]['tMax'],
                                 cases_key=region
                                 if region in case_counts else 'None')
            if first_wave:
                scenario.mitigation.mitigation_intervals = [
                    MitigationInterval(
                        name="Intervention 1",
                        tMin=datetime.strptime(
                            results[region]['containment_start'],
                            '%Y-%m-%d').date(),
                        id=uuid4(),
                        tMax=scenario.simulation.simulation_time_range.end +
                        timedelta(1),
                        color=mitigation_colors.get("Intervention 1",
                                                    "#cccccc"),
                        mitigationValue=round(100 *
                                              results[region]['efficacy']))
                ]
            elif region in case_counts:
                set_mitigation(scenario,
                               results[region].get('mitigations', []))
                data = convert_to_vectors(case_counts[region])
                weekly_data = cumulative_to_rolling_average(data)
                tmp_initial = weekly_data['cases'][-time_range_fit]
            else:
                scenario.mitigation.mitigation_intervals = []
            if len(scenario.mitigation.mitigation_intervals):
                scenario.mitigation.mitigation_intervals[
                    -1].time_range.end = datetime.strptime(
                        results[region]['tMax'],
                        '%Y-%m-%d').date() + timedelta(1)
            scenario.population.seroprevalence = round(
                100 * results[region]['seroprevalence'], 2)
            print(region, tmp_initial)
            scenario.population.initial_number_of_cases = int(
                round(tmp_initial * 5))

            if first_wave:
                scenario_name = f"[1st wave] {region}"
            else:
                scenario_name = region

            scenarios.append(ScenarioData(scenario, scenario_name))

    with open(output_json, "w+") as fd:
        output = ScenarioArray(scenarios)
        output.marshalJSON(fd)