Exemple #1
0
        ]
        model = lambda: Model.single_unit(name=state,
                                          RR0=Rt_pred[-1],
                                          population=pop,
                                          infectious_period=infectious_period,
                                          I0=T_pred[-1],
                                          lower_CI=T_CI_lower[-1],
                                          upper_CI=T_CI_upper[-1],
                                          random_seed=33)
        forward_pred_period = 9
        t_pred = [
            dates[-1] + pd.Timedelta(days=i)
            for i in range(forward_pred_period + 1)
        ]
        current = model().run(forward_pred_period)
        target = simulate_PID_controller(model(), 0, forward_pred_period)
        plt.Rt(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, CI, ymin = 0, ymax = 5, yaxis_colors = False)\
            .adjust(left = 0.10, right = 0.95, bottom = 0.15, top = 0.95)\
            .xlabel("date")\
            .ylabel("$R_t$")\
            .show()
        plt.daily_cases(dates, T_pred, T_CI_upper, T_CI_lower, new_cases_ts, anomaly_dates, anomalies, CI,
            prediction_ts = [
                (current[0].delta_T[1:], current[0].lower_CI[1:], current[0].upper_CI[1:], "orange", r"projection with current $R_t$"),
                (target[0].delta_T[1:],  target[0].lower_CI[1:],  target[0].upper_CI[1:],  "green",  r"projection with $R_t \rightarrow 0.9$")
            ])\
            .adjust(left = 0.10, right = 0.95, bottom = 0.15, top = 0.95)\
            .xlabel("date")\
            .ylabel("cases")\
            .show()
Exemple #2
0
state_ts = state_cases["date_reported"].value_counts().sort_index()
district_names, population_counts, _ = etl.district_migration_matrix(
    data / "Migration Matrix - District.csv")
populations = dict(zip(district_names, population_counts))

# first, look at state level predictions
(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower,
 total_cases, new_cases_ts, anomalies, anomaly_dates) = analytical_MPVS(
     state_ts,
     CI=CI,
     smoothing=notched_smoothing(window=smoothing),
     totals=False)

plt.Rt(dates, Rt_pred[1:], Rt_CI_upper[1:], Rt_CI_lower[1:], CI, ymin=0, ymax=4)\
    .title("\nBihar: Reproductive Number Estimate")\
    .annotate(f"data from {str(dates[0]).split()[0]} to {str(dates[-1]).split()[0]}")\
    .xlabel("date")\
    .ylabel("$R_t$", rotation=0, labelpad=20)\
    .show()

np.random.seed(33)
Bihar = SIR("Bihar",
            99_000_000,
            dT0=T_pred[-1],
            Rt0=Rt_pred[-1],
            lower_CI=T_CI_lower[-1],
            upper_CI=T_CI_upper[-1],
            mobility=0)
Bihar.run(14)

t_pred = [dates[-1] + pd.Timedelta(days=i) for i in range(len(Bihar.dT))]
Exemple #3
0
            "Rt_pred": dth_Rt_pred,
            "Rt_CI_upper": dth_Rt_CI_upper,
            "Rt_CI_lower": dth_Rt_CI_lower,
            "T_pred": dth_T_pred,
            "T_CI_upper": dth_T_CI_upper,
            "T_CI_lower": dth_T_CI_lower,
            "total_cases": dth_total_cases[2:],
            "new_cases_ts": dth_new_cases_ts,
        })
    dth_estimates["anomaly"] = dth_estimates["dates"].isin(
        set(dth_anomaly_dates))
    print("  + Rt (dth) today:", inf_Rt_pred[-1])

    fig, axs = plt.subplots(1, 2, sharey=True)
    plt.sca(axs[0])
    plt.Rt(inf_dates, inf_Rt_pred, inf_Rt_CI_lower, inf_Rt_CI_upper, CI)\
        .axis_labels("date", "$R_t$")
    plt.title("estimated from infections",
              loc="left",
              fontdict=plt.theme.label)

    # fig, axs = plt.subplots(3, 1, sharex = True)
    # plt.sca(axs[0])
    # plt.plot(dth_dates, delhi_dD_smoothed[2:], color = "orange")
    # plt.title("d$D$/d$t$", loc = "left", fontdict = plt.theme.label)

    # plt.sca(axs[1])
    # plt.plot(dth_dates, np.diff(delhi_dD_smoothed)[1:], color = "red")
    # plt.title("d$^2D$/d$t^2$", loc = "left", fontdict = plt.theme.label)

    plt.sca(axs[1])
    plt.Rt(dth_dates, dth_Rt_pred, dth_Rt_CI_lower, dth_Rt_CI_upper, CI)\
Exemple #4
0
gdf = gpd.read_file("data/gadm36_IDN_shp/gadm36_IDN_2.shp")\
         .query("NAME_1 == 'Jakarta Raya'")\
         .drop(columns=shp_drop_cols)
bbox = shapely.geometry.box(minx = 106.65, maxx = 107.00, miny = -6.40, maxy=-6.05)
gdf = gdf[gdf.intersects(bbox)]

jakarta_districts = dkij.district.str.title().unique()
jakarta_cases = dkij.groupby("date_positiveresult")["id"].count().rename("cases")

logger.info("running province-level Rt estimate")
(dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
    = analytical_MPVS(jakarta_cases, CI = CI, smoothing = smoothing, totals=False) 

plt.Rt(dates, RR_pred[1:], RR_CI_upper[1:], RR_CI_lower[1:], CI)\
    .title("\nDKI Jakarta: Reproductive Number Estimate")\
    .xlabel("\ndate")\
    .ylabel("$R_t$\n", rotation=0, labelpad=30)\
    .annotate(f"\n{window}-day smoothing window, gamma-prior Bayesian estimation method")\
    .show()


logger.info("running case-forward prediction")
prediction_period = 14*days
IDN = SIR(name = "IDN", population = 267.7e6, dT0 = T_pred[-1], Rt0 = RR_pred[-1], upper_CI = T_CI_upper[-1], lower_CI = T_CI_lower[-1], mobility = 0, random_seed = 0)\
           .run(prediction_period)
 
plt.daily_cases(dates, T_pred[1:], T_CI_upper[1:], T_CI_lower[1:], new_cases_ts[1:], anomaly_dates, anomalies, CI, 
    prediction_ts = [
        (IDN.dT[:-1], IDN.lower_CI[1:], IDN.upper_CI[1:], None, "predicted cases")
    ])\
    .title("\nDKI Jakarta: Daily Cases")\
    .xlabel("\ndate")\
def generate_report(state_code: str):
    print(f"Received request for {state_code}.")
    state = state_code_lookup[state_code]
    normalized_state = state.replace(" and ", " And ").replace(" & ", " And ")
    blobs = {
        f"pipeline/est/{state_code}_state_Rt.csv":
        f"/tmp/state_Rt_{state_code}.csv",
        f"pipeline/est/{state_code}_district_Rt.csv":
        f"/tmp/district_Rt_{state_code}.csv",
        f"pipeline/commons/maps/{state_code}.json":
        f"/tmp/state_{state_code}.geojson"
    } if normalized_state not in dissolved_states else {
        f"pipeline/est/{state_code}_state_Rt.csv":
        f"/tmp/state_Rt_{state_code}.csv",
    }
    for (blob_name, filename) in blobs.items():
        bucket.blob(blob_name).download_to_filename(filename)
    print(f"Downloaded estimates for {state_code}.")

    state_Rt = pd.read_csv(f"/tmp/state_Rt_{state_code}.csv",
                           parse_dates=["dates"],
                           index_col=0)

    plt.close("all")
    dates = [pd.Timestamp(date).to_pydatetime() for date in state_Rt.dates]
    plt.Rt(dates, state_Rt.Rt_pred, state_Rt.Rt_CI_lower, state_Rt.Rt_CI_upper, CI)\
        .axis_labels("date", "$R_t$")\
        .title(f"{state}: $R_t$ over time", ha = "center", x = 0.5)\
        .adjust(left = 0.11, bottom = 0.16)
    plt.gcf().set_size_inches(3840 / 300, 1986 / 300)
    plt.savefig(f"/tmp/{state_code}_Rt_timeseries.png")
    plt.close()
    print(f"Generated timeseries plot for {state_code}.")

    # check output is at least 50 KB
    timeseries_size_kb = os.stat(
        f"/tmp/{state_code}_Rt_timeseries.png").st_size / 1000
    print(f"Timeseries artifact size: {timeseries_size_kb} kb")
    assert timeseries_size_kb > 50
    bucket.blob(
        f"pipeline/rpt/{state_code}_Rt_timeseries.png").upload_from_filename(
            f"/tmp/{state_code}_Rt_timeseries.png", content_type="image/png")

    if normalized_state not in (island_states + dissolved_states):
        district_Rt = pd.read_csv(f"/tmp/district_Rt_{state_code}.csv",
                                  parse_dates=["dates"],
                                  index_col=0)
        latest_Rt = district_Rt[district_Rt.dates == district_Rt.dates.max(
        )].set_index("district")["Rt_pred"].to_dict()
        top10 = [(k, "> 3.0" if v > 3 else f"{v:.2f}") for (k, v) in sorted(
            latest_Rt.items(), key=lambda t: t[1], reverse=True)[:10]]

        gdf = gpd.read_file(f"/tmp/state_{state_code}.geojson")
        gdf["Rt"] = gdf.district.map(latest_Rt)
        fig, ax = plt.subplots()
        fig.set_size_inches(3840 / 300, 1986 / 300)
        plt.choropleth(gdf, title = None, mappable = plt.get_cmap(0.75, 2.5), fig = fig, ax = ax)\
            .adjust(left = 0)
        plt.sca(fig.get_axes()[0])
        plt.PlotDevice(fig).title(f"{state}: $R_t$ by district",
                                  ha="center",
                                  x=0.5)
        plt.axis('off')
        plt.savefig(f"/tmp/{state_code}_Rt_choropleth.png", dpi=300)
        plt.close()
        print(f"Generated choropleth for {state_code}.")

        # check output is at least 100 KB
        choropleth_size_kb = os.stat(
            f"/tmp/{state_code}_Rt_choropleth.png").st_size / 1000
        print(f"Choropleth artifact size: {choropleth_size_kb} kb")
        assert choropleth_size_kb > 100
        bucket.blob(f"pipeline/rpt/{state_code}_Rt_choropleth.png"
                    ).upload_from_filename(
                        f"/tmp/{state_code}_Rt_choropleth.png",
                        content_type="image/png")
    else:
        print(f"Skipped choropleth for {state_code}.")

    if normalized_state not in dissolved_states:
        fig, ax = plt.subplots(1, 1)
        ax.axis('tight')
        ax.axis('off')
        table = ax.table(cellText=top10,
                         colLabels=["district", "$R_t$"],
                         loc='center',
                         cellLoc="center")
        table.scale(1, 2)
        for (row, col), cell in table.get_celld().items():
            if (row == 0):
                cell.set_text_props(fontfamily=plt.theme.label["family"],
                                    fontsize=plt.theme.label["size"],
                                    fontweight="semibold")
            else:
                cell.set_text_props(fontfamily=plt.theme.label["family"],
                                    fontsize=plt.theme.label["size"],
                                    fontweight="light")
        plt.PlotDevice().title(f"{state}: top districts by $R_t$",
                               ha="center",
                               x=0.5)
        plt.savefig(f"/tmp/{state_code}_Rt_top10.png", dpi=600)
        plt.close()
        print(f"Generated top 10 district listing for {state_code}.")

        # check output is at least 50 KB
        top10_size_kb = os.stat(
            f"/tmp/{state_code}_Rt_top10.png").st_size / 1000
        print(f"Top 10 listing artifact size: {top10_size_kb} kb")
        assert top10_size_kb > 50
        bucket.blob(
            f"pipeline/rpt/{state_code}_Rt_top10.png").upload_from_filename(
                f"/tmp/{state_code}_Rt_top10.png", content_type="image/png")
    else:
        print(f"Skipped top 10 district listing for {state_code}.")

    # sleep for 15 seconds to ensure the images finish saving
    time.sleep(15)

    print(f"Uploaded artifacts for {state_code}.")
    return "OK!"
Exemple #6
0
plt.legend(framealpha=1, handlelength=1, loc="best")
plt.PlotDevice().xlabel("time").ylabel("reproductive rate").adjust(left=0.10,
                                                                   bottom=0.15,
                                                                   right=0.99,
                                                                   top=0.99)
plt.ylim(0.5, 1.5)
plt.show()

# 1: parametric scheme:
dates, Rt, Rt_lb, Rt_ub, *_, anomalies, anomaly_dates = analytical_MPVS(
    pd.DataFrame(sir_model.dT),
    smoothing=convolution("uniform", 2),
    CI=0.99,
    totals=False)
pd = plt.Rt(dates, Rt, Rt_ub, Rt_lb, ymin = 0.5, ymax = 2.5, CI = 0.99, yaxis_colors = False, format_dates = False, critical_threshold = False)\
    .xlabel("time")\
    .ylabel("reproductive rate")\
    .adjust(left = 0.11, bottom = 0.15, right = 0.98, top = 0.98)
plt.plot(sir_model.Rt, "-", color="white", linewidth=3, zorder=10)
sim_rt, = plt.plot(sir_model.Rt,
                   "-",
                   color="dodgerblue",
                   linewidth=2,
                   zorder=11)
anoms = plt.vlines(anomaly_dates, 0, 4, colors="red", linewidth=2, alpha=0.5)
plt.legend([pd.markers["Rt"], sim_rt, anoms],
           ["Estimated $R_t$ (99% CI)", "simulated $R_t$", "anomalies"],
           **pd.legend_props)
plt.show()

# 2: naive MCMC
model, trace, summary = parametric_scheme_mcmc(sir_model.dT,
Exemple #7
0
            "dates": dates,
            "Rt_pred": Rt_pred,
            "RR_CI_upper": RR_CI_upper,
            "RR_CI_lower": RR_CI_lower,
            "T_pred": T_pred,
            "T_CI_upper": T_CI_upper,
            "T_CI_lower": T_CI_lower,
            "total_cases": total_cases[2:],
            "new_cases_ts": new_cases_ts,
        })
    print("  + Rt today:", Rt_pred[-5:])

    plt.Rt(dates, Rt_pred, RR_CI_lower, RR_CI_upper, CI)\
        .ylabel("Estimated $R_t$")\
        .xlabel("Date")\
        .title(state)\
        .size(11, 8)\
        .save(figs/f"Rt_est_{state}.png", dpi=600, bbox_inches="tight")\
        .show()

    estimates["anomaly"] = estimates["dates"].isin(set(anomaly_dates))
    estimates.to_csv(data /
                     f"india_rt_data_{state}_{data_recency}_run{run_date}.csv")

tn_ts = get_time_series(df.query("detected_state == 'Tamil Nadu'"),
                        "detected_district")
for district in tn_ts.index.get_level_values(0).unique()[19:]:
    print(district)
    print("  + running estimation...")
    (dates, Rt_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower,
     total_cases, new_cases_ts, anomalies, anomaly_dates) = analytical_MPVS(
    if (i + 1, j + 1) in coords.values():
        continue
    ax_nest[j, i].axis("off")

for ((state, district), (x, y)) in coords.items():
    plt.sca(ax_nest[y - 1, x - 1])
    urban_share = int(
        (1 - serodist.loc[state, ("New " if district == "Delhi" else "") +
                          district]["rural_share"].mean()) * 100)
    density = pop_density.loc[state, district].density
    rt_data = district_estimates.loc[state, district].set_index(
        "dates")["Feb 1, 2021":]
    plt.Rt(rt_data.index,
           rt_data.Rt_pred,
           rt_data.RR_CI_upper,
           rt_data.RR_CI_lower,
           0.95,
           yaxis_colors=False,
           ymin=0.5,
           ymax=2.0)
    if (x, y) != (4, 1):
        plt.gca().get_legend().remove()
    plt.gca().set_xticks([
        pd.Timestamp("February 1, 2021"),
        pd.Timestamp("March 1, 2021"),
        pd.Timestamp("April 1, 2021")
    ])

    plt.PlotDevice()\
        .l_title(district, fontsize = 12)\
        .r_title(f"{urban_share}% urban, {density}/km$^2$", fontsize = 10)
Exemple #9
0
# generation_interval =  generation_interval[(generation_interval.index >= 0) & (generation_interval.index <= 60)]
# generation_interval /= generation_interval.sum()

new_cases = cases.confirmed.value_counts().sort_index()
new_cases_smoothed = smoothing(new_cases)
plt.plot(new_cases, '.', color="blue")
plt.plot(new_cases.index, new_cases_smoothed, '-', color="black")
plt.show()

logger.info("running province-level Rt estimate")
(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
    = analytical_MPVS(new_cases, CI = CI, smoothing = smoothing, totals = False)

plt.Rt(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, CI)\
    .title("\nSouth Sulawesi: Reproductive Number Estimate")\
    .xlabel("\ndate")\
    .ylabel("$R_t$\n", rotation=0, labelpad=30)\
    .annotate(f"\n{window}-day smoothing window, gamma-prior Bayesian estimation method")\
    .show()

logger.info("running case-forward prediction")
prediction_period = 14 * days
I0 = (~cases.confirmed.isna()).sum() - (~cases.recovered.isna()).sum() - (
    ~cases.died.isna()).sum()
IDN = SIR(name = "IDN", population = 8_819_500, dT0 = T_pred[-1], Rt0 = Rt_pred[-1], upper_CI = T_CI_upper[-1], lower_CI = T_CI_lower[-1], mobility = 0, random_seed = 0, I0 = I0)\
           .run(prediction_period)

plt.daily_cases(dates, T_pred, T_CI_upper, T_CI_lower, new_cases_ts, anomaly_dates, anomalies, CI,
    prediction_ts = [
        (IDN.dT[:-1], IDN.lower_CI[1:], IDN.upper_CI[1:], plt.PRED_PURPLE, "predicted cases")
    ])\
    .title("\nSouth Sulawesi: Daily Cases")\
Exemple #10
0
    .size(9.5, 6)\
    .save(figs / "fig_1.svg")\
    .show()

# estimate Rt
from epimargin.estimators import analytical_MPVS

(dates, Rt, Rt_CI_upper, Rt_CI_lower,
 *_) = analytical_MPVS(training_cases,
                       smoother,
                       infectious_period=10,
                       totals=False)
plt.Rt(dates[1:], Rt[1:], Rt_CI_upper[1:], Rt_CI_lower[1:], 0.95, legend_loc = "upper left")\
    .l_title("$R_t$ over time for Mumbai")\
    .axis_labels(x = "date", y = "reproductive rate")\
    .adjust(bottom = 0.15, left = 0.15)\
    .size(9.5, 6)\
    .save(figs / "fig_2.svg")\
    .show()

# set up model
from epimargin.models import SIR

num_sims = 100
N0 = 12.48e6
R0, D0 = daily_reports.loc[end][["recovered", "deceased"]]
I0 = smoothed_cases[:end].sum()
dT0 = smoothed_cases[end]
S0 = N0 - I0 - R0 - D0
Rt0 = Rt[-1] * N0 / S0
no_lockdown = SIR(name="no lockdown",
Exemple #11
0
state_code = "MH"

state_Rt = pd.read_csv(
    "/Users/satej/Downloads/pipeline_est_MH_state_Rt (1).csv",
    parse_dates=["dates"],
    index_col=0)
district_Rt = pd.read_csv(
    "/Users/satej/Downloads/pipeline_est_MH_district_Rt (2).csv",
    parse_dates=["dates"],
    index_col=0)

latest_Rt = district_Rt[district_Rt.dates == district_Rt.dates.max(
)].set_index("district")["Rt_pred"].to_dict()

plt.Rt(list(state_Rt.dates), state_Rt.Rt_pred, state_Rt.Rt_CI_lower, state_Rt.Rt_CI_upper, CI)\
    .axis_labels("date", "$R_t$")\
    .title("Maharashtra: $R_t$ over time", ha = "center", x = 0.5)\
    .adjust(left = 0.11, bottom = 0.16)
plt.gcf().set_size_inches(3840 / 300, 1986 / 300)
plt.savefig("./MH_Rt_timeseries.png")
plt.clf()

gdf = gpd.read_file("data/maharashtra.json", dpi=600)

gdf["Rt"] = gdf.district.map(latest_Rt)
fig, ax = plt.subplots()
fig.set_size_inches(3840 / 300, 1986 / 300)
plt.choropleth(gdf, title = None, mappable = plt.get_cmap(0.75, 2.5), fig = fig, ax = ax)\
    .adjust(left = 0)
plt.sca(fig.get_axes()[0])
plt.PlotDevice(fig).title(f"{state}: $R_t$ by district", ha="center", x=0.5)
plt.axis('off')
Exemple #12
0
bgn = min(cases.index.min() for cases in province_cases.values())
end = max(cases.index.max() for cases in province_cases.values())
idx = pd.date_range(bgn, end)
province_cases = {
    province: cases.reindex(idx, method="pad").fillna(0)
    for (province, cases) in province_cases.items()
}
natl_cases = sum(province_cases.values())

logger.info("running national-level Rt estimate")
(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
     = analytical_MPVS(natl_cases, CI = CI, smoothing = smoothing)

plt.Rt(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, CI, ymin=0, ymax=4)\
    .title("\nIndonesia: Reproductive Number Estimate")\
    .xlabel("\ndate")\
    .ylabel("$R_t$", rotation=0, labelpad=30)\
    .annotate(f"\n{window}-day smoothing window, gamma-prior Bayesian estimation method")\
    .show()

logger.info("running case-forward prediction")
IDN = SIR("IDN",
          267.7e6,
          dT0=T_pred[-1],
          Rt0=Rt_pred[-1],
          mobility=0,
          random_seed=0).run(14)

logger.info("province-level projections")
migration = np.zeros((len(provinces), len(provinces)))
estimates = []
max_len = 1 + max(map(len, provinces))
Exemple #13
0
cases = cases.dropna(subset=["age"])
cases["age_bin"] = pd.cut(cases.age,
                          bins=[0] + list(range(20, 80, 10)) + [100])
age_ts = cases[["age_bin",
                "confirmed"]].groupby(["age_bin",
                                       "confirmed"]).size().sort_index()
ss_max_rts = {}

fig, axs = plt.subplots(4, 2, True, True)
(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
    = analytical_MPVS(age_ts.sum(level = 1), CI = CI, smoothing = notched_smoothing(window = 5), totals = False)
plt.sca(axs.flat[0])
plt.Rt(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower,
       CI).annotate(f"all ages").adjust(left=0.04,
                                        right=0.96,
                                        top=0.95,
                                        bottom=0.05,
                                        hspace=0.3,
                                        wspace=0.15)
r = pd.Series(Rt_pred, index=dates)
ss_max_rts["all"] = r[r.index.month_name() == "April"].max()

for (age_bin,
     ax) in zip(age_ts.index.get_level_values(0).categories, axs.flat[1:]):
    print(age_bin)
    (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
        = analytical_MPVS(age_ts.loc[age_bin], CI = CI, smoothing = smoothing, totals = False)
    plt.sca(ax)
    plt.Rt(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower,
           CI).annotate(f"age bin: {age_bin}")
    ax.get_legend().remove()