Beispiel #1
0
def estimate(time_series: pd.Series) -> pd.DataFrame:
    estimates = analytical_MPVS(time_series,
                                CI=CI,
                                smoothing=smooth,
                                totals=True)
    return pd.DataFrame(
        data={
            "date": estimates[0],
            "Rt": estimates[1],
            "Rt_upper": estimates[2],
            "Rt_lower": estimates[3],
            "total_cases": estimates[-4][2:],
            "new_cases": estimates[-3],
        })
def run_adaptive_model(df: pd.DataFrame, CI: float, smoothing: Callable,
                       filepath: Path) -> None:
    '''
    Runs adaptive control model of Rt and smoothed case counts based on what is currently in the 
    analytical_MPVS module. Takes in dataframe of cases and saves to csv a dataframe of results.
    '''
    # Initialize results df
    res_full = pd.DataFrame()

    # Loop through each state
    print("Estimating state Rt values...")
    for state in tqdm(df['state'].unique()):

        # Calculate Rt for that state
        state_df = df[df['state'] == state].set_index('date')
        (dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper,
         T_CI_lower, total_cases, new_cases_ts, _,
         anomaly_dates) = analytical_MPVS(
             state_df[state_df['positive'] > 0]['positive'],
             CI=CI,
             smoothing=smoothing)
        assert (len(dates) == len(RR_pred))

        # Save results
        res = pd.DataFrame({
            'state': state,
            'date': dates,
            'RR_pred': RR_pred,
            'RR_CI_upper': RR_CI_upper,
            'RR_CI_lower': RR_CI_lower,
            'T_pred': T_pred,
            'T_CI_upper': T_CI_upper,
            'T_CI_lower': T_CI_lower,
            'new_cases_ts': new_cases_ts,
            'total_cases': total_cases[2:],
            'anamoly': dates.isin(set(anomaly_dates))
        })
        res_full = pd.concat([res_full, res], axis=0)

    # Merge results back onto input df and return
    merged_df = df.merge(res_full, how='outer', on=['state', 'date'])
    merged_df.to_csv(filepath / "adaptive_estimates.csv")
Beispiel #3
0
province_cases = {
    province: load_province_timeseries(data, province)
    for province in provinces
}
bgn = min(cases.index.min() for cases in province_cases.values())
end = max(cases.index.max() for cases in province_cases.values())
idx = pd.date_range(bgn, end)
province_cases = {
    province: cases.reindex(idx, method="pad").fillna(0)
    for (province, cases) in province_cases.items()
}
natl_cases = sum(province_cases.values())

logger.info("running national-level Rt estimate")
(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
     = analytical_MPVS(natl_cases, CI = CI, smoothing = smoothing)

plt.Rt(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, CI, ymin=0, ymax=4)\
    .title("\nIndonesia: Reproductive Number Estimate")\
    .xlabel("\ndate")\
    .ylabel("$R_t$", rotation=0, labelpad=30)\
    .annotate(f"\n{window}-day smoothing window, gamma-prior Bayesian estimation method")\
    .show()

logger.info("running case-forward prediction")
IDN = SIR("IDN",
          267.7e6,
          dT0=T_pred[-1],
          Rt0=Rt_pred[-1],
          mobility=0,
          random_seed=0).run(14)
Beispiel #4
0
def run_estimates(request):
    state_code = get(request, 'state_code')
    state = state_code_lookup[state_code]

    print(f"Rt estimation for {state} ({state_code}) started")

    bucket = storage.Client().bucket(bucket_name)
    bucket.blob("pipeline/commons/refs/all_crosswalk.dta")\
        .download_to_filename("/tmp/all_crosswalk.dta")

    bucket.blob("pipeline/raw/states.csv")\
        .download_to_filename("/tmp/states.csv")

    bucket.blob("pipeline/raw/districts.csv")\
        .download_to_filename("/tmp/districts.csv")

    crosswalk = pd.read_stata("/tmp/all_crosswalk.dta")
    district_cases = pd.read_csv("/tmp/districts.csv")\
        .rename(columns = str.lower)\
        .set_index(["state", "district", "date"])\
        .sort_index()\
        .rename(index = lambda s: s.replace(" and ", " & "), level = 0)\
        .loc[state]
    state_cases = pd.read_csv("/tmp/states.csv")\
        .rename(columns = str.lower)\
        .set_index(["state", "date"])\
        .sort_index()\
        .rename(index = lambda s: s.replace(" and ", " & "), level = 0)\
        .loc[state]
    print(f"Estimating state-level Rt for {state_code}")
    normalized_state = state.replace(" and ", " And ").replace(" & ", " And ")
    lgd_state_name, lgd_state_id = crosswalk.query(
        "state_api == @normalized_state").filter(
            like="lgd_state").drop_duplicates().iloc[0]
    try:
        (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper,
         T_CI_lower, total_cases, new_cases_ts,
         *_) = analytical_MPVS(state_cases.iloc[-lookback:-cutoff].confirmed,
                               CI=CI,
                               smoothing=notched_smoothing(window=smoothing),
                               totals=True)

        pd.DataFrame(data = {
            "dates": dates[1:],
            "Rt_pred": Rt_pred,
            "Rt_CI_upper": Rt_CI_upper,
            "Rt_CI_lower": Rt_CI_lower,
            "T_pred": T_pred,
            "T_CI_upper": T_CI_upper,
            "T_CI_lower": T_CI_lower,
            "total_cases": total_cases[2:],
            "new_cases_ts": new_cases_ts,
        })\
            .assign(state = state, lgd_state_name = lgd_state_name, lgd_state_id = lgd_state_id)\
            .to_csv("/tmp/state_Rt.csv")

        # upload to cloud
        bucket.blob(
            f"pipeline/est/{state_code}_state_Rt.csv").upload_from_filename(
                "/tmp/state_Rt.csv", content_type="text/csv")
    except Exception as e:
        print(f"ERROR when estimating Rt for {state_code}", e)
        print(traceback.print_exc())

    if normalized_state in dissolved_states:
        print(f"Skipping district-level Rt for {state_code}")
    else:
        print(f"Estimating district-level Rt for {state} ({state_code})")
        estimates = []
        for district in filter(
                lambda _: _.strip() not in excluded,
                district_cases.index.get_level_values(0).unique()):
            print(f"running estimation for [{district}]")
            lgd_district_data = crosswalk.query(
                "state_api == @normalized_state & district_api == @district"
            ).filter(like="lgd_district").drop_duplicates()
            if not lgd_district_data.empty:
                lgd_district_name, lgd_district_id = lgd_district_data.iloc[0]
            else:
                lgd_district_name, lgd_district_id = lgd_state_name, lgd_state_id
            try:
                (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper,
                 T_CI_lower, total_cases, new_cases_ts, *_) = analytical_MPVS(
                     district_cases.loc[district].iloc[-lookback:-cutoff].
                     confirmed,
                     CI=CI,
                     smoothing=notched_smoothing(window=smoothing),
                     totals=True)
                estimates.append(
                    pd.DataFrame(
                        data={
                            "dates": dates[1:],
                            "Rt_pred": Rt_pred,
                            "Rt_CI_upper": Rt_CI_upper,
                            "Rt_CI_lower": Rt_CI_lower,
                            "T_pred": T_pred,
                            "T_CI_upper": T_CI_upper,
                            "T_CI_lower": T_CI_lower,
                            "total_cases": total_cases[2:],
                            "new_cases_ts": new_cases_ts,
                        }).assign(state=state,
                                  lgd_state_name=lgd_state_name,
                                  lgd_state_id=lgd_state_id,
                                  district=district,
                                  lgd_district_name=lgd_district_name,
                                  lgd_district_id=lgd_district_id))
            except Exception as e:
                print(f"ERROR when estimating Rt for {district}, {state_code}",
                      e)
                print(traceback.print_exc())

        pd.concat(estimates).to_csv("/tmp/district_Rt.csv")

        # upload to cloud
        bucket.blob(
            f"pipeline/est/{state_code}_district_Rt.csv").upload_from_filename(
                "/tmp/district_Rt.csv", content_type="text/csv")

    return "OK!"
Beispiel #5
0
def assemble_TN_simulation_initial_conditions():
    ts = get_state_timeseries()
    rows = []
    district_age_pop = pd.read_csv(
        data /
        "district_age_estimates_padded.csv").dropna().set_index("district")
    for (district, sero_0, N_0, sero_1, N_1, sero_2, N_2, sero_3, N_3, sero_4,
         N_4, sero_5, N_5, sero_6, N_6,
         N_tot) in district_age_pop.filter(items=list(district_codes.keys()),
                                           axis=0).itertuples():
        dT_conf = ts.loc[district].dT
        dT_conf = dT_conf.reindex(pd.date_range(dT_conf.index.min(),
                                                dT_conf.index.max()),
                                  fill_value=0)
        dT_conf_smooth = pd.Series(smooth(dT_conf),
                                   index=dT_conf.index).clip(0).astype(int)
        T_conf_smooth = dT_conf_smooth.cumsum().astype(int)
        T_conf = T_conf_smooth[survey_date]

        dR_conf = ts.loc[district].dR
        dR_conf = dR_conf.reindex(pd.date_range(dR_conf.index.min(),
                                                dR_conf.index.max()),
                                  fill_value=0)
        dR_conf_smooth = pd.Series(smooth(dR_conf),
                                   index=dR_conf.index).clip(0).astype(int)
        R_conf_smooth = dR_conf_smooth.cumsum().astype(int)
        R_conf = R_conf_smooth[survey_date]
        R_sero = (sero_0 * N_0 + sero_1 * N_1 + sero_2 * N_2 + sero_3 * N_3 +
                  sero_4 * N_4 + sero_5 * N_5 + sero_6 * N_6)
        R_ratio = R_sero / R_conf
        R0 = R_conf_smooth[simulation_start] * R_ratio

        dD_conf = ts.loc[district].dD
        dD_conf = dD_conf.reindex(pd.date_range(dD_conf.index.min(),
                                                dD_conf.index.max()),
                                  fill_value=0)
        dD_conf_smooth = pd.Series(smooth(dD_conf),
                                   index=dD_conf.index).clip(0).astype(int)
        D_conf_smooth = dD_conf_smooth.cumsum().astype(int)
        D0 = D_conf_smooth[simulation_start]

        T_sero = R_sero + D0
        T_ratio = T_sero / T_conf
        T0 = T_conf_smooth[simulation_start] * T_ratio

        S0 = N_tot - T0
        dD0 = dD_conf_smooth[simulation_start]
        dT0 = dT_conf_smooth[simulation_start] * T_ratio
        I0 = max(0, (T0 - R0 - D0))

        (Rt_dates, Rt_est, *_) = analytical_MPVS(T_ratio * dT_conf_smooth,
                                                 CI=CI,
                                                 smoothing=lambda _: _,
                                                 totals=False)
        Rt = dict(zip(Rt_dates, Rt_est))

        rows.append((district, sero_0, N_0, sero_1, N_1, sero_2, N_2, sero_3,
                     N_3, sero_4, N_4, sero_5, N_5, sero_6, N_6, N_tot,
                     Rt[simulation_start], S0, I0, R0, D0, dT0, dD0))

    pd.DataFrame(rows,
                 columns=[
                     "district", "sero_0", "N_0", "sero_1", "N_1", "sero_2",
                     "N_2", "sero_3", "N_3", "sero_4", "N_4", "sero_5", "N_5",
                     "sero_6", "N_6", "N_tot", "Rt", "S0", "I0", "R0", "D0",
                     "dT0", "dD0"
                 ]).to_csv(data / "simulation_initial_conditions.csv")
    if vax_pct_annual_goal == 0 and vax_effectiveness != 1.00:
        continue
    # grab time series
    D, R = ts.loc[district][["dD", "dR"]].sum()

    dT_conf_district = ts.loc[district].dT
    dT_conf_district = dT_conf_district.reindex(pd.date_range(
        dT_conf_district.index.min(), dT_conf_district.index.max()),
                                                fill_value=0)
    dT_conf_district_smooth = pd.Series(
        smooth(dT_conf_district),
        index=dT_conf_district.index).clip(0).astype(int)

    # run Rt estimation on scaled timeseries
    (Rt_dates, Rt_est, *_) = analytical_MPVS(T_ratio * dT_conf_district_smooth,
                                             CI=CI,
                                             smoothing=lambda _: _,
                                             totals=False)
    Rt = dict(zip(Rt_dates, Rt_est))

    daily_rate = vax_pct_annual_goal / 365
    daily_vax_doses = int(vax_effectiveness * daily_rate * N_district)

    T_scaled = dT_conf_district_smooth.cumsum()[simulation_start] * T_ratio

    model = SIR(
        name=state,
        population=N_district,
        dT0=np.ones(num_sims) *
        (dT_conf_district_smooth[simulation_start] * T_ratio).astype(int),
        Rt0=Rt[simulation_start] * N_district / (N_district - T_scaled),
        I0=np.ones(num_sims) * (T_scaled - R - D),
Beispiel #7
0
         .query("province == 'DKI JAKARTA'")\
         .drop(columns=dkij_drop_cols + ["province"])
dkij["district"] = dkij.district.str.title()

gdf = gpd.read_file("data/gadm36_IDN_shp/gadm36_IDN_2.shp")\
         .query("NAME_1 == 'Jakarta Raya'")\
         .drop(columns=shp_drop_cols)
bbox = shapely.geometry.box(minx = 106.65, maxx = 107.00, miny = -6.40, maxy=-6.05)
gdf = gdf[gdf.intersects(bbox)]

jakarta_districts = dkij.district.str.title().unique()
jakarta_cases = dkij.groupby("date_positiveresult")["id"].count().rename("cases")

logger.info("running province-level Rt estimate")
(dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
    = analytical_MPVS(jakarta_cases, CI = CI, smoothing = smoothing, totals=False) 

plt.Rt(dates, RR_pred[1:], RR_CI_upper[1:], RR_CI_lower[1:], CI)\
    .title("\nDKI Jakarta: Reproductive Number Estimate")\
    .xlabel("\ndate")\
    .ylabel("$R_t$\n", rotation=0, labelpad=30)\
    .annotate(f"\n{window}-day smoothing window, gamma-prior Bayesian estimation method")\
    .show()


logger.info("running case-forward prediction")
prediction_period = 14*days
IDN = SIR(name = "IDN", population = 267.7e6, dT0 = T_pred[-1], Rt0 = RR_pred[-1], upper_CI = T_CI_upper[-1], lower_CI = T_CI_lower[-1], mobility = 0, random_seed = 0)\
           .run(prediction_period)
 
plt.daily_cases(dates, T_pred[1:], T_CI_upper[1:], T_CI_lower[1:], new_cases_ts[1:], anomaly_dates, anomalies, CI, 
Beispiel #8
0
    # download data from india covid 19 api
    for target in paths['v3'] + paths['v4']:
        download_data(data, target)

    df = load_all_data(v3_paths=[data / filepath for filepath in paths['v3']],
                       v4_paths=[data / filepath for filepath in paths['v4']])
    data_recency = str(df["date_announced"].max()).split()[0]
    run_date = str(pd.Timestamp.now()).split()[0]

    ts = get_time_series(df[df.detected_state == "Delhi"])

    (dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower,
     total_cases, new_cases_ts, anomalies,
     anomaly_dates) = analytical_MPVS(ts.delta[ts.delta > 0],
                                      CI=CI,
                                      smoothing=convolution(window=smoothing))
    #= analytical_MPVS(ts.Hospitalized[ts.Hospitalized > 0], CI = CI, smoothing = lambda ts: box_filter(ts, smoothing, 10))

    np.random.seed(33)
    delhi = Model([
        ModelUnit("Delhi",
                  18_000_000,
                  I0=T_pred[-1],
                  RR0=RR_pred[-1],
                  mobility=0)
    ])
    delhi.run(14, np.zeros((1, 1)))

    t_pred = [
        dates[-1] + pd.Timedelta(days=i) for i in range(len(delhi[0].delta_T))
Beispiel #9
0
plt.plot(sir_model.dT)
plt.show()
plt.plot(R0_timeseries, "-", color="black", label="$R_0$")
plt.plot(sir_model.Rt, "-", color="dodgerblue", label="$R_t$")
plt.legend(framealpha=1, handlelength=1, loc="best")
plt.PlotDevice().xlabel("time").ylabel("reproductive rate").adjust(left=0.10,
                                                                   bottom=0.15,
                                                                   right=0.99,
                                                                   top=0.99)
plt.ylim(0.5, 1.5)
plt.show()

# 1: parametric scheme:
dates, Rt, Rt_lb, Rt_ub, *_, anomalies, anomaly_dates = analytical_MPVS(
    pd.DataFrame(sir_model.dT),
    smoothing=convolution("uniform", 2),
    CI=0.99,
    totals=False)
pd = plt.Rt(dates, Rt, Rt_ub, Rt_lb, ymin = 0.5, ymax = 2.5, CI = 0.99, yaxis_colors = False, format_dates = False, critical_threshold = False)\
    .xlabel("time")\
    .ylabel("reproductive rate")\
    .adjust(left = 0.11, bottom = 0.15, right = 0.98, top = 0.98)
plt.plot(sir_model.Rt, "-", color="white", linewidth=3, zorder=10)
sim_rt, = plt.plot(sir_model.Rt,
                   "-",
                   color="dodgerblue",
                   linewidth=2,
                   zorder=11)
anoms = plt.vlines(anomaly_dates, 0, 4, colors="red", linewidth=2, alpha=0.5)
plt.legend([pd.markers["Rt"], sim_rt, anoms],
           ["Estimated $R_t$ (99% CI)", "simulated $R_t$", "anomalies"],
cases = pd.read_stata(data/"coviddkijakarta_290920.dta")\
        .query("province == 'DKI JAKARTA'")\
        .drop(columns = dkij_drop_cols + ["province"])
cases = cases\
    .set_axis(cases.columns.str.lower(), 1)\
    .assign(
        district    = cases.district.str.title(),
        subdistrict = cases.subdistrict.apply(lambda name: next((k for (k, v) in replacements.items() if name in v), name)),
    )

cases["age_bin"] = pd.cut(cases.age,
                          bins=[0] + list(range(20, 80, 10)) + [100])
age_ts = cases[["age_bin", "date_positiveresult"
                ]].groupby(["age_bin",
                            "date_positiveresult"]).size().sort_index()
dkij_max_rts = {}

(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
    = analytical_MPVS(age_ts.sum(level = 1), CI = CI, smoothing = smoothing, totals = False)
r = pd.Series(Rt_pred, index=dates)
dkij_max_rts["all"] = r[r.index.month_name() == "April"].max()

for age_bin in age_ts.index.get_level_values(0).categories:
    print(age_bin)
    (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
        = analytical_MPVS(age_ts.loc[age_bin], CI = CI, smoothing = smoothing, totals = False)
    r = pd.Series(Rt_pred, index=dates)
    dkij_max_rts[age_bin] = r[r.index.month_name() == "April"].max()

print(dkij_max_rts)
Beispiel #11
0
time_series["delta_I"] = time_series.groupby(level=0)['Hospitalized'].diff()
time_series["dow"] = time_series.index.get_level_values(1).dayofweek
plot_average_change(time_series,
                    "(All India)",
                    filename=figs / "avg_delta_I_DoW_India.png")
for state in tqdm(time_series.index.get_level_values(0).unique()):
    plot_average_change(time_series.loc[state],
                        f"({state})",
                        filename=figs / f"avg_delta_I_DoW_{state}.png")

# are anomalies falling on certain days?
print("checking anomalies...")
smoothing = 5
(*_,
 anomaly_dates) = analytical_MPVS(natl_time_series["Hospitalized"].iloc[:-1],
                                  CI=0.95,
                                  smoothing=convolution(window=smoothing))
anomaly_histogram(anomaly_dates,
                  "(All India)",
                  filename=figs / "anomaly_DoW_hist_India.png")
for state in tqdm(time_series.index.get_level_values(0).unique()):
    (*_, anomaly_dates) = analytical_MPVS(
        time_series.loc[state]["Hospitalized"].iloc[:-1],
        CI=0.95,
        smoothing=convolution(window=smoothing))
    anomaly_histogram(anomaly_dates,
                      f"({state})",
                      filename=figs / f"anomaly_DoW_hist_{state}.png")

print("estimating spectral densities...")
# what does the aggregate spectral density look like?
Beispiel #12
0
#     .apply(get_generation_interval, axis = 1)\
#     .dropna()\
#     .value_counts()\
#     .sort_index()
# generation_interval =  generation_interval[(generation_interval.index >= 0) & (generation_interval.index <= 60)]
# generation_interval /= generation_interval.sum()

new_cases = cases.confirmed.value_counts().sort_index()
new_cases_smoothed = smoothing(new_cases)
plt.plot(new_cases, '.', color="blue")
plt.plot(new_cases.index, new_cases_smoothed, '-', color="black")
plt.show()

logger.info("running province-level Rt estimate")
(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
    = analytical_MPVS(new_cases, CI = CI, smoothing = smoothing, totals = False)

plt.Rt(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, CI)\
    .title("\nSouth Sulawesi: Reproductive Number Estimate")\
    .xlabel("\ndate")\
    .ylabel("$R_t$\n", rotation=0, labelpad=30)\
    .annotate(f"\n{window}-day smoothing window, gamma-prior Bayesian estimation method")\
    .show()

logger.info("running case-forward prediction")
prediction_period = 14 * days
I0 = (~cases.confirmed.isna()).sum() - (~cases.recovered.isna()).sum() - (
    ~cases.died.isna()).sum()
IDN = SIR(name = "IDN", population = 8_819_500, dT0 = T_pred[-1], Rt0 = Rt_pred[-1], upper_CI = T_CI_upper[-1], lower_CI = T_CI_lower[-1], mobility = 0, random_seed = 0, I0 = I0)\
           .run(prediction_period)
Beispiel #13
0
plt.PlotDevice()\
    .l_title("case timeseries for Mumbai")\
    .axis_labels(x = "date", y = "daily cases")\
    .legend()\
    .adjust(bottom = 0.15, left = 0.15)\
    .format_xaxis()\
    .size(9.5, 6)\
    .save(figs / "fig_1.svg")\
    .show()

# estimate Rt
from epimargin.estimators import analytical_MPVS

(dates, Rt, Rt_CI_upper, Rt_CI_lower,
 *_) = analytical_MPVS(training_cases,
                       smoother,
                       infectious_period=10,
                       totals=False)
plt.Rt(dates[1:], Rt[1:], Rt_CI_upper[1:], Rt_CI_lower[1:], 0.95, legend_loc = "upper left")\
    .l_title("$R_t$ over time for Mumbai")\
    .axis_labels(x = "date", y = "reproductive rate")\
    .adjust(bottom = 0.15, left = 0.15)\
    .size(9.5, 6)\
    .save(figs / "fig_2.svg")\
    .show()

# set up model
from epimargin.models import SIR

num_sims = 100
N0 = 12.48e6
R0, D0 = daily_reports.loc[end][["recovered", "deceased"]]
Beispiel #14
0
CI        = 0.95

# private data
state_cases = pd.read_csv(data/"Bihar_cases_data_Jul23.csv", parse_dates=["date_reported"], dayfirst=True)
state_ts = state_cases["date_reported"].value_counts().sort_index()
district_names, population_counts, _ = etl.district_migration_matrix(data/"Migration Matrix - District.csv")
populations = dict(zip(district_names, population_counts))

# first, look at state level predictions
(
    dates,
    RR_pred, RR_CI_upper, RR_CI_lower,
    T_pred, T_CI_upper, T_CI_lower,
    total_cases, new_cases_ts,
    anomalies, anomaly_dates
) = analytical_MPVS(state_ts, CI = CI, smoothing = convolution(window = smoothing)) 

plot_RR_est(dates, RR_pred, RR_CI_upper, RR_CI_lower, CI, ymin=0, ymax=4)\
    .title("Bihar: Reproductive Number Estimate Comparisons")\
    .xlabel("Date")\
    .ylabel("Rt", rotation=0, labelpad=20)
plt.ylim(0, 4)

# public data 
paths = { 
    "v3": [data_path(_) for _ in (1, 2)],
    "v4": [data_path(_) for _ in range(3, 13)]
}

for target in paths['v3'] + paths['v4']:
    download_data(data, target)
Beispiel #15
0
def assemble_initial_conditions(states = "*", coalesce_states = coalesce_states, simulation_start = simulation_start, survey_date = survey_date, download = False):
    rows = []
    district_age_pop = pd.read_csv(data/"all_india_sero_pop.csv").set_index(["state", "district"])
    if states == "*":
        districts_to_run = district_age_pop
    else:
        districts_to_run = district_age_pop[district_age_pop.index.isin(states, level = 0)]

    progress = tqdm(total = 4 * len(districts_to_run) + 11)
    progress.set_description(f"{'loading case data':<20}")
    
    ts  = get_state_timeseries(states, download)
    included_coalesce_states = coalesce_states if states == "*" else list(set(states) & set(coalesce_states))
    if included_coalesce_states:
        # sum data for states to coalesce across districts
        coalesce_ts = get_state_timeseries(included_coalesce_states, download = download, aggregation_cols = ["detected_state"])\
            .reset_index()\
            .assign(detected_district = lambda _:_["detected_state"])\
            .set_index(["detected_state", "detected_district", "status_change_date"])
        
        # replace original entries
        ts = pd.concat([
            ts.drop(labels = included_coalesce_states, axis = 0, level = 0),
            coalesce_ts
        ]).sort_index()

        # sum up seroprevalence in coalesced states
        districts_to_run = pd.concat(
            [districts_to_run.drop(labels = included_coalesce_states, axis = 0, level = 0)] + 
            [districts_to_run.loc[state]\
                .assign(**{f"infected_{i}": (lambda i: lambda _: _[f"sero_{i}"] * _[f"N_{i}"])(i) for i in range(7)})\
                .drop(columns = [f"sero_{i}" for i in range(7)])\
                .sum(axis = 0)\
                .to_frame().T\
                .assign(**{f"sero_{i}": (lambda i: lambda _: _[f"infected_{i}"] / _[f"N_{i}"])(i) for i in range(7)})\
                [districts_to_run.columns]\
                .assign(state = state, district = state)\
                .set_index(["state", "district"])
            for state in included_coalesce_states]
        ).sort_index()

    vax = load_vax_data(download)
    progress.update(10)
    for ((state, district), 
        sero_0, sero_1, sero_2, sero_3, sero_4, sero_5, sero_6, 
        N_0, N_1, N_2, N_3, N_4, N_5, N_6, N_tot
    ) in districts_to_run.dropna().itertuples():
        progress.set_description(f"{state[:20]:<20}")
        
        dR_conf = ts.loc[state, district].dR
        dR_conf = dR_conf.reindex(pd.date_range(dR_conf.index.min(), dR_conf.index.max()), fill_value = 0)
        if len(dR_conf) >= window + 1:
            dR_conf_smooth = pd.Series(smooth(dR_conf), index = dR_conf.index).clip(0).astype(int)
        else: 
            dR_conf_smooth = dR_conf

        R_conf_smooth  = dR_conf_smooth.cumsum().astype(int)
        R_conf = R_conf_smooth[survey_date if survey_date in R_conf_smooth.index else -1]
        R_sero = (sero_0*N_0 + sero_1*N_1 + sero_2*N_2 + sero_3*N_3 + sero_4*N_4 + sero_5*N_5 + sero_6*N_6)
        R_ratio = R_sero/R_conf if R_conf != 0 else 1 
        R0 = R_conf_smooth[simulation_start if simulation_start in R_conf_smooth.index else -1] * R_ratio
        progress.update(1)
        
        V0 = vax.loc[simulation_start][state] * N_tot / districts_to_run.loc[state].N_tot.sum()
        
        dD_conf = ts.loc[state, district].dD
        dD_conf = dD_conf.reindex(pd.date_range(dD_conf.index.min(), dD_conf.index.max()), fill_value = 0)
        if len(dD_conf) >= window + 1:
            dD_conf_smooth = pd.Series(smooth(dD_conf), index = dD_conf.index).clip(0).astype(int)
        else:
            dD_conf_smooth = dD_conf
        D_conf_smooth  = dD_conf_smooth.cumsum().astype(int)
        D0 = D_conf_smooth[simulation_start if simulation_start in D_conf_smooth.index else -1]
        progress.update(1)
        
        dT_conf = ts.loc[state, district].dT
        dT_conf = dT_conf.reindex(pd.date_range(dT_conf.index.min(), dT_conf.index.max()), fill_value = 0)
        if len(dT_conf) >= window + 1:
            dT_conf_smooth = pd.Series(smooth(dT_conf), index = dT_conf.index).clip(0).astype(int)
        else:
            dT_conf_smooth = dT_conf
        T_conf_smooth  = dT_conf_smooth.cumsum().astype(int)
        T_conf = T_conf_smooth[survey_date if survey_date in T_conf_smooth.index else -1]
        T_sero = R_sero + D0 
        T_ratio = T_sero/T_conf if T_conf != 0 else 1 
        T0 = T_conf_smooth[simulation_start if simulation_start in T_conf_smooth.index else -1] * T_ratio
        progress.update(1)

        S0 = max(0, N_tot - T0 - V0)
        dD0 = dD_conf_smooth[simulation_start if simulation_start in dD_conf_smooth.index else -1]
        dT0 = dT_conf_smooth[simulation_start if simulation_start in dT_conf_smooth.index else -1] * T_ratio
        I0 = max(0, (T0 - R0 - D0))

        (Rt_dates, Rt_est, Rt_CI_upper, Rt_CI_lower, *_) = analytical_MPVS(
            T_ratio * dT_conf_smooth, 
            CI = CI, 
            smoothing = lambda _:_, 
            infectious_period = infectious_period, 
            totals = False
        )
        Rt_timeseries       = dict(zip(Rt_dates, Rt_est))
        Rt_upper_timeseries = dict(zip(Rt_dates, Rt_CI_upper))
        Rt_lower_timeseries = dict(zip(Rt_dates, Rt_CI_lower))

        Rt       = Rt_timeseries      .get(simulation_start, Rt_timeseries      [max(Rt_timeseries      .keys())]) if Rt_timeseries       else 0
        Rt_upper = Rt_upper_timeseries.get(simulation_start, Rt_upper_timeseries[max(Rt_upper_timeseries.keys())]) if Rt_upper_timeseries else 0
        Rt_lower = Rt_lower_timeseries.get(simulation_start, Rt_lower_timeseries[max(Rt_lower_timeseries.keys())]) if Rt_lower_timeseries else 0


        rows.append((state_name_lookup[state], state, district, 
            sero_0, N_0, sero_1, N_1, sero_2, N_2, sero_3, N_3, sero_4, N_4, sero_5, N_5, sero_6, N_6, N_tot, 
            0, 0, 0, S0, I0, R0, D0, dT0, dD0, V0, T_ratio, R_ratio
        ))
        progress.update(1)
    out = pd.DataFrame(rows, 
        columns = ["state_code", "state", "district", "sero_0", "N_0", "sero_1", "N_1", "sero_2", "N_2", "sero_3", "N_3", "sero_4", "N_4", "sero_5", "N_5", "sero_6", "N_6", "N_tot", "Rt", "Rt_upper", "Rt_lower", "S0", "I0", "R0", "D0", "dT0", "dD0", "V0", "T_ratio", "R_ratio"]
    )
    progress.update(1)
    return (ts, out)
Beispiel #16
0
focus = ts.loc[[
    "Maharashtra", "Madhya Pradesh", "Gujarat", "West Bengal", "Tamil Nadu"
]]
district_estimates = []

for (state, district) in focus.index.droplevel(-1).unique():
    if district in ["Unknown", "Other State"]:
        continue
    print(state, district)
    try:
        (dates, Rt_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper,
         T_CI_lower, total_cases, new_cases_ts, anomalies,
         anomaly_dates) = analytical_MPVS(
             focus.loc[state, district].Hospitalized,
             CI=CI,
             smoothing=notched_smoothing(window=smoothing),
             totals=False)
        district_estimates.append(
            pd.DataFrame(
                data={
                    "dates": dates,
                    "Rt_pred": Rt_pred,
                    "RR_CI_upper": RR_CI_upper,
                    "RR_CI_lower": RR_CI_lower,
                    "T_pred": T_pred,
                    "T_CI_upper": T_CI_upper,
                    "T_CI_lower": T_CI_lower,
                    "total_cases": total_cases[2:],
                    "new_cases_ts": new_cases_ts,
                }).assign(state=state, district=district))
                       for _ in top_level[timeseries]],
                      columns=["date", "total_cases"])
    df["date"] = (date_scale * df["date"]).apply(pd.Timestamp)
    return df.set_index("date")


logger.info("district-level projections")

pops = [sum([2_430_410, 910_381, 2_164_070, 2_817_994, 1_729_444, 23_011])]
dkij = load_province_timeseries(data, district)
R_mandatory = dict()
R_voluntary = dict()

(dates, Rt_pred,
 *_) = analytical_MPVS(dkij,
                       CI=CI,
                       smoothing=notched_smoothing(window=window),
                       totals=True)
Rt = pd.DataFrame(data={"Rt": Rt_pred}, index=dates)
R_mandatory[district] = np.mean(Rt[(Rt.index >= "Sept 21, 2020")])[0]
R_voluntary[district] = np.mean(Rt[(Rt.index < "April 1, 2020")])[0]

si, sf = 0, 10

simulation_results = [
    run_policies([dkij.iloc[-1][0] - dkij.iloc[-2][0]],
                 pops,
                 districts,
                 np.zeros((1, 1)),
                 gamma,
                 R_mandatory,
                 R_voluntary,
Beispiel #18
0
# model details
CI = 0.95
smoothing = 30
alpha = 3.8
beta = 2.25
vs = 0.999

true_Rt = pd.read_table("./true_Rt.txt", dtype="float", squeeze=True)
obs_cases = pd.read_table("./obs_cases.txt", dtype="float", squeeze=True)

(dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower,
 total_cases, new_cases_ts, anomalies, anomaly_dates) = analytical_MPVS(
     obs_cases,
     CI=CI,
     alpha=alpha,
     beta=beta,
     variance_shift=vs,
     smoothing=lambda ts: box_filter(ts, smoothing, smoothing // 2))

print("Rt today:", RR_pred[-1])
print("a, b, vs, MSE:", alpha, beta, vs,
      ((true_Rt.loc[len(true_Rt) - len(RR_pred):] - RR_pred)**2).sum())

plot_RR_est(dates, RR_pred, RR_CI_lower, RR_CI_upper, CI)\
    .ylabel("Estimated $R_t$")\
    .title("Synthetic Data Estimation")\
    .size(11, 8)
plt.plot(true_Rt.index, true_Rt.values, 'k--', label="True $R_t$")
plt.xlim(0, 150)
plt.ylim(0, 2.5)
Beispiel #19
0
def estimate(ts, smoothing):
    (state_dates, R, *_) = analytical_MPVS(ts.Hospitalized,
                                           smoothing=smoothing)
    dates = [sd[1] if isinstance(sd, tuple) else sd for sd in state_dates]
    return pd.DataFrame({"date": dates, "R": R}).set_index("date")
Beispiel #20
0
    for province in provinces
}
bgn = min(cases.index.min() for cases in province_cases.values())
end = max(cases.index.max() for cases in province_cases.values())
idx = pd.date_range(bgn, end)
province_cases = {
    province: cases.reindex(idx, method="pad").fillna(0)
    for (province, cases) in province_cases.items()
}

prediction_period = 14 * days
for province in provinces:
    title = province.title().replace("Dki", "DKI")
    logger.info(title)
    (dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
         = analytical_MPVS(province_cases[province], CI = CI, smoothing = smoothing)
    IDN = Model.single_unit(name = province, population = priority_pops[province], I0 = T_pred[-1], RR0 = RR_pred[-1], upper_CI = T_CI_upper[-1], lower_CI = T_CI_lower[-1], mobility = 0, random_seed = 0)\
            .run(prediction_period)

    plt.Rt(dates, RR_pred, RR_CI_upper, RR_CI_lower, CI, ymin=0.2, ymax=4.5)\
        .title(f"{title}")\
        .xlabel("\ndate")\
        .ylabel("$R_t$", rotation=0, labelpad=30)\
        .show()

    # plt.daily_cases(dates, T_pred, T_CI_upper, T_CI_lower, new_cases_ts, anomaly_dates, anomalies, CI, IDN[0].delta_T[:-1], IDN[0].lower_CI[1:], IDN[0].upper_CI[1:])\
    #     .title(f"\n{title}")\
    #     .xlabel("\ndate")\
    #     .ylabel("cases")\
    #     .show()
Beispiel #21
0
ts = get_time_series(df, "detected_state")

states = [
    "Maharashtra", "Punjab", "West Bengal", "Bihar", "Delhi", "Andhra Pradesh",
    "Telangana", "Tamil Nadu", "Madhya Pradesh"
]

for state in states[:1]:
    print(state)
    print("  + running estimation...")

    (inf_dates, inf_Rt_pred, inf_Rt_CI_upper, inf_Rt_CI_lower, inf_T_pred,
     inf_T_CI_upper, inf_T_CI_lower, inf_total_cases, inf_new_cases_ts,
     inf_anomalies, inf_anomaly_dates) = analytical_MPVS(
         ts.loc[state].Hospitalized,
         CI=CI,
         smoothing=notched_smoothing(window=smoothing),
         infectious_period=infectious_period,
         totals=False)
    inf_estimates = pd.DataFrame(
        data={
            "dates": inf_dates,
            "Rt_pred": inf_Rt_pred,
            "Rt_CI_upper": inf_Rt_CI_upper,
            "Rt_CI_lower": inf_Rt_CI_lower,
            "T_pred": inf_T_pred,
            "T_CI_upper": inf_T_CI_upper,
            "T_CI_lower": inf_T_CI_lower,
            "total_cases": inf_total_cases[2:],
            "new_cases_ts": inf_new_cases_ts,
        })
    inf_estimates["anomaly"] = inf_estimates["dates"].isin(
Beispiel #22
0
figs = root/"figs/comparison/kaggle"

states = ["Maharashtra"]#, "Bihar", "Delhi", "Andhra Pradesh", "Telangana", "Tamil Nadu", "Madhya Pradesh"]

kaggle = pd.read_csv(data/"covid_19_india.csv", parse_dates=[1], dayfirst=True).set_index("Date")

for state in states: 
    print(state)
    print("  + running estimation...")
    (
        dates,
        RR_pred, RR_CI_upper, RR_CI_lower,
        T_pred, T_CI_upper, T_CI_lower,
        total_cases, new_cases_ts,
        anomalies, anomaly_dates
    ) = analytical_MPVS(kaggle[kaggle["State/UnionTerritory"] == state].Confirmed, CI = CI, smoothing = lambda ts: box_filter(ts, smoothing, 3))

    estimates = pd.DataFrame(data = {
        "dates": dates,
        "RR_pred": RR_pred,
        "RR_CI_upper": RR_CI_upper,
        "RR_CI_lower": RR_CI_lower,
        "T_pred": T_pred,
        "T_CI_upper": T_CI_upper,
        "T_CI_lower": T_CI_lower,
        "total_cases": total_cases[2:],
        "new_cases_ts": new_cases_ts,
    })
    print("  + Rt today:", RR_pred[-1])
    
    plot_RR_est(dates, RR_pred, RR_CI_lower, RR_CI_upper, CI)\
Beispiel #23
0
    state_cases["geo_reported"] = state_cases.geo_reported.str.strip()
    state_cases = state_cases[state_cases.date_reported <= "2020-09-30"]
    state_ts = state_cases["date_reported"].value_counts().sort_index()
    district_ts = state_cases.groupby(
        ["geo_reported",
         "date_reported"])["date_reported"].count().sort_index()
    districts, pops, migrations = etl.district_migration_matrix(
        data / "Migration Matrix - District.csv")
    districts = sorted([etl.replacements.get(dn, dn) for dn in districts])

    R_mandatory = dict()
    for district in districts:  #district_ts.index.get_level_values(0).unique():
        try:
            (_, Rt,
             *_) = analytical_MPVS(district_ts.loc[district],
                                   CI=CI,
                                   smoothing=notched_smoothing(window=10),
                                   totals=False)
            Rm = np.mean(Rt)
        except ValueError as v:
            Rm = 1.5
        R_mandatory[district] = Rm

    R_voluntary = {district: 1.2 * R for (district, R) in R_mandatory.items()}

    si, sf = 0, 10

    simulation_results = [
        run_policies(state_cases,
                     pops,
                     districts,
                     migrations,
Beispiel #24
0
data_recency = str(df["date_announced"].max()).split()[0]
run_date = str(pd.Timestamp.now()).split()[0]

ts = get_time_series(df, "detected_state")

states = [
    "Tamil Nadu", "Karnataka"
]  #["Maharashtra", "Punjab", "West Bengal", "Bihar", "Delhi", "Andhra Pradesh", "Telangana", "Tamil Nadu", "Madhya Pradesh"]

for state in states:
    print(state)
    print("  + running estimation...")
    (dates, Rt_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower,
     total_cases, new_cases_ts, anomalies, anomaly_dates) = analytical_MPVS(
         ts.loc[state].Hospitalized,
         CI=CI,
         smoothing=notched_smoothing(window=smoothing),
         totals=False)
    estimates = pd.DataFrame(
        data={
            "dates": dates,
            "Rt_pred": Rt_pred,
            "RR_CI_upper": RR_CI_upper,
            "RR_CI_lower": RR_CI_lower,
            "T_pred": T_pred,
            "T_CI_upper": T_CI_upper,
            "T_CI_lower": T_CI_lower,
            "total_cases": total_cases[2:],
            "new_cases_ts": new_cases_ts,
        })
    print("  + Rt today:", Rt_pred[-5:])
Beispiel #25
0
    #     plt.daily_cases(dates, T_pred, T_CI_upper, T_CI_lower, new_cases_ts, anomaly_dates, anomalies, CI,
    #         prediction_ts = [
    #             (current[0].delta_T[1:], current[0].lower_CI[1:], current[0].upper_CI[1:], "orange", r"projection with current $R_t$"),
    #             (target[0].delta_T[1:],  target[0].lower_CI[1:],  target[0].upper_CI[1:],  "green",  r"projection with $R_t \rightarrow 0.9$")
    #         ])\
    #         .adjust(left = 0.10, right = 0.95, bottom = 0.15, top = 0.95)\
    #         .xlabel("date")\
    #         .ylabel("cases")\
    #         .show()

    # run Indian states
    for (state, pop) in [("Maharashtra", 112374333), ("Gujarat", 60439692),
                         ("Bihar", 104099452)]:
        print(state)
        (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates) \
            = analytical_MPVS(india[india.state == state][["date", "confirmed"]].set_index("date")  , CI = CI, smoothing = smoothing, totals=True)
        dates = [pd.Timestamp(_).to_pydatetime().date() for _ in dates]
        anomaly_dates = [
            pd.Timestamp(_).to_pydatetime().date() for _ in anomaly_dates
        ]
        model = lambda: Model.single_unit(name=state,
                                          RR0=Rt_pred[-1],
                                          population=pop,
                                          infectious_period=infectious_period,
                                          I0=T_pred[-1],
                                          lower_CI=T_CI_lower[-1],
                                          upper_CI=T_CI_upper[-1],
                                          random_seed=33)
        forward_pred_period = 9
        t_pred = [
            dates[-1] + pd.Timedelta(days=i)
gamma = 0.2
smoothing = 10
CI = 0.95

download_data(data, 'state_wise_daily.csv')

state_df = load_statewise_data(data / "state_wise_daily.csv")
country_time_series = get_time_series(state_df)

estimates = []
timeseries = []

# country level
(dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower,
 total_cases, new_cases_ts, anomalies, anomaly_dates) = analytical_MPVS(
     country_time_series["Hospitalized"].iloc[:-1],
     CI=CI,
     smoothing=notched_smoothing(window=smoothing))

country_code = state_name_lookup["India"]
for row in zip(dates, RR_pred, RR_CI_upper, RR_CI_lower):
    timeseries.append((country_code, *row))

# state level rt estimates
state_time_series = get_time_series(state_df, 'state')
state_names = list(state_time_series.index.get_level_values(level=0).unique())
max_len = 1 + max(map(len, state_names))
with tqdm(state_names) as states:
    for state in states:
        state_code = state_name_lookup[state]
        states.set_description(f"{state :<{max_len}}")
        try:
Beispiel #27
0
# plt.semilogy()
plt.show()

# supplement: Rt distribution (state)
state_ts = ts.sum(level=[0, 2]).sort_index().drop(labels=[
    "State Unassigned", "Lakshadweep", "Ladakh", "Andaman And Nicobar Islands",
    "Goa"
] + [
    "Sikkim", "Chandigarh", "Mizoram", "Puducherry", "Arunachal Pradesh",
    "Nagaland", "Manipur", "Meghalaya", "Tripura", "Himachal Pradesh"
] + ["Dadra And Nagar Haveli And Daman And Diu"])

india_ts = ts.sum(level=-1)

_, Rt_TT, Rt_CI_upper_TT, Rt_CI_lower_TT, *_ =\
    analytical_MPVS(dT_conf_scaled_smooth_TT.loc["Jan 1, 2021":simulation_start], smoothing = lambda _:_, infectious_period = infectious_period, totals = False)
Rt_TTn, Rt_CI_upper_TTn, Rt_CI_lower_TTn = [
    np.mean(_[-7:]) for _ in (Rt_TT, Rt_CI_upper_TT, Rt_CI_lower_TT)
]

Rt_dist = {}
for state in state_ts.index.get_level_values(0).unique():
    *_, dT_conf_scaled_smooth = sero_scaling(district_age_pop.loc[state],
                                             ts.loc[state].sum(level=-1))
    _, Rt, Rt_CI_upper, Rt_CI_lower, *_ =\
        analytical_MPVS(state_ts.loc[state].loc["Jan 1, 2021":simulation_start].dT, smoothing = lambda _:_, infectious_period = infectious_period, totals = False)
    Rt_dist[state] = [np.mean(_[-7:]) for _ in (Rt, Rt_CI_upper, Rt_CI_lower)]

Rt_dist = {
    k: v
    for (k, v) in sorted(Rt_dist.items(), key=lambda e: e[1][0], reverse=True)
Beispiel #28
0
def setup(district) -> Tuple[Callable[[str], SIR], pd.DataFrame]:
    demographics = simulation_initial_conditions.loc[district]
    
    dR_conf = ts.loc[district].dR
    dR_conf = dR_conf.reindex(pd.date_range(dR_conf.index.min(), dR_conf.index.max()), fill_value = 0)
    dR_conf_smooth = pd.Series(smooth(dR_conf), index = dR_conf.index).clip(0).astype(int)
    R_conf_smooth  = dR_conf_smooth.cumsum().astype(int)

    R0 = R_conf_smooth[data_recency]

    dD_conf = ts.loc[district].dD
    dD_conf = dD_conf.reindex(pd.date_range(dD_conf.index.min(), dD_conf.index.max()), fill_value = 0)
    dD_conf_smooth = pd.Series(smooth(dD_conf), index = dD_conf.index).clip(0).astype(int)
    D_conf_smooth  = dD_conf_smooth.cumsum().astype(int)
    D0 = D_conf_smooth[data_recency]

    dT_conf = ts.loc[district].dT
    dT_conf = dT_conf.reindex(pd.date_range(dT_conf.index.min(), dT_conf.index.max()), fill_value = 0)

    (
        dates,
        Rt_pred, Rt_CI_upper, Rt_CI_lower,
        T_pred, T_CI_upper, T_CI_lower,
        total_cases, new_cases_ts,
        *_
    ) = analytical_MPVS(ts.loc[district].dT, CI = CI, smoothing = notched_smoothing(window = smoothing), totals = False)
    Rt_estimates = pd.DataFrame(data = {
        "dates"       : dates,
        "Rt_pred"     : Rt_pred,
        "Rt_CI_upper" : Rt_CI_upper,
        "Rt_CI_lower" : Rt_CI_lower,
        "T_pred"      : T_pred,
        "T_CI_upper"  : T_CI_upper,
        "T_CI_lower"  : T_CI_lower,
        "total_cases" : total_cases[2:],
        "new_cases_ts": new_cases_ts,
    })

    dT_conf_smooth = pd.Series(smooth(dT_conf), index = dT_conf.index).clip(0).astype(int)
    T_conf_smooth  = dT_conf_smooth.cumsum().astype(int)
    T0 = T_conf_smooth[data_recency]
    dT0 = dT_conf_smooth[data_recency]

    S0 = max(0, demographics.N_tot - T0)
    I0 = max(0, T0 - R0 - D0)

    return ( 
        lambda seed = 0: SIR(
            name = district, 
            mortality = demographics[[f"N_{i}" for i in range(7)]] @ np.array(list(TN_IFRs.values()))/demographics.N_tot,
            population = demographics.N_tot, 
            random_seed = seed,
            infectious_period = 10, 
            S0  = S0,
            I0  = I0, 
            R0  = R0, 
            D0  = D0, 
            dT0 = dT0, 
            Rt0 = Rt_estimates.set_index("dates").loc[data_recency].Rt_pred * demographics.N_tot/S0), 
        Rt_estimates
    )
Beispiel #29
0
smoothing = 10
CI = 0.95

state_cases = pd.read_csv(data / "Bihar_cases_data_Oct03.csv",
                          parse_dates=["date_reported"],
                          dayfirst=True)
state_ts = state_cases["date_reported"].value_counts().sort_index()
district_names, population_counts, _ = etl.district_migration_matrix(
    data / "Migration Matrix - District.csv")
populations = dict(zip(district_names, population_counts))

# first, look at state level predictions
(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower,
 total_cases, new_cases_ts, anomalies, anomaly_dates) = analytical_MPVS(
     state_ts,
     CI=CI,
     smoothing=notched_smoothing(window=smoothing),
     totals=False)

plt.Rt(dates, Rt_pred[1:], Rt_CI_upper[1:], Rt_CI_lower[1:], CI, ymin=0, ymax=4)\
    .title("\nBihar: Reproductive Number Estimate")\
    .annotate(f"data from {str(dates[0]).split()[0]} to {str(dates[-1]).split()[0]}")\
    .xlabel("date")\
    .ylabel("$R_t$", rotation=0, labelpad=20)\
    .show()

np.random.seed(33)
Bihar = SIR("Bihar",
            99_000_000,
            dT0=T_pred[-1],
            Rt0=Rt_pred[-1],
Beispiel #30
0
            .query("NAME_1.str.startswith('Jakarta')")\
            .drop(columns=shp_drop_cols)
    gdf.NAME_3 = gdf.NAME_3.str.upper()
    bbox = shapely.geometry.box(minx=106.65,
                                maxx=107.00,
                                miny=-6.40,
                                maxy=-6.05)
    gdf = gdf[gdf.intersects(bbox)]

    jakarta_districts = dkij.district.unique()
    jakarta_cases = dkij.groupby("date_positiveresult")["id"].count().rename(
        "cases")

    logger.info("running province-level Rt estimate")
    (dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
        = analytical_MPVS(jakarta_cases, CI = CI, smoothing = smoothing, totals=False)

    plt.Rt(dates, RR_pred[1:], RR_CI_upper[1:], RR_CI_lower[1:], CI)\
        .title("\nDKI Jakarta: Reproductive Number Estimate")\
        .xlabel("\ndate")\
        .ylabel("$R_t$\n", rotation=0, labelpad=30)\
        .annotate(f"\n{window}-day smoothing window, gamma-prior Bayesian estimation method")\
        .show()

    logger.info("running case-forward prediction")
    prediction_period = 14 * days
    IDN = SIR(name = "IDN", population = 267.7e6, dT0 = T_pred[-1], Rt0 = RR_pred[-1], upper_CI = T_CI_upper[-1], lower_CI = T_CI_lower[-1], mobility = 0, random_seed = 0)\
            .run(prediction_period)

    plt.daily_cases(dates, T_pred[1:], T_CI_upper[1:], T_CI_lower[1:], new_cases_ts[1:], anomaly_dates, anomalies, CI, IDN[0].delta_T[:-1], IDN[0].lower_CI[1:], IDN[0].upper_CI[1:])\
        .title("\nDKI Jakarta: Daily Cases")\