] model = lambda: Model.single_unit(name=state, RR0=Rt_pred[-1], population=pop, infectious_period=infectious_period, I0=T_pred[-1], lower_CI=T_CI_lower[-1], upper_CI=T_CI_upper[-1], random_seed=33) forward_pred_period = 9 t_pred = [ dates[-1] + pd.Timedelta(days=i) for i in range(forward_pred_period + 1) ] current = model().run(forward_pred_period) target = simulate_PID_controller(model(), 0, forward_pred_period) plt.Rt(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, CI, ymin = 0, ymax = 5, yaxis_colors = False)\ .adjust(left = 0.10, right = 0.95, bottom = 0.15, top = 0.95)\ .xlabel("date")\ .ylabel("$R_t$")\ .show() plt.daily_cases(dates, T_pred, T_CI_upper, T_CI_lower, new_cases_ts, anomaly_dates, anomalies, CI, prediction_ts = [ (current[0].delta_T[1:], current[0].lower_CI[1:], current[0].upper_CI[1:], "orange", r"projection with current $R_t$"), (target[0].delta_T[1:], target[0].lower_CI[1:], target[0].upper_CI[1:], "green", r"projection with $R_t \rightarrow 0.9$") ])\ .adjust(left = 0.10, right = 0.95, bottom = 0.15, top = 0.95)\ .xlabel("date")\ .ylabel("cases")\ .show()
state_ts = state_cases["date_reported"].value_counts().sort_index() district_names, population_counts, _ = etl.district_migration_matrix( data / "Migration Matrix - District.csv") populations = dict(zip(district_names, population_counts)) # first, look at state level predictions (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates) = analytical_MPVS( state_ts, CI=CI, smoothing=notched_smoothing(window=smoothing), totals=False) plt.Rt(dates, Rt_pred[1:], Rt_CI_upper[1:], Rt_CI_lower[1:], CI, ymin=0, ymax=4)\ .title("\nBihar: Reproductive Number Estimate")\ .annotate(f"data from {str(dates[0]).split()[0]} to {str(dates[-1]).split()[0]}")\ .xlabel("date")\ .ylabel("$R_t$", rotation=0, labelpad=20)\ .show() np.random.seed(33) Bihar = SIR("Bihar", 99_000_000, dT0=T_pred[-1], Rt0=Rt_pred[-1], lower_CI=T_CI_lower[-1], upper_CI=T_CI_upper[-1], mobility=0) Bihar.run(14) t_pred = [dates[-1] + pd.Timedelta(days=i) for i in range(len(Bihar.dT))]
"Rt_pred": dth_Rt_pred, "Rt_CI_upper": dth_Rt_CI_upper, "Rt_CI_lower": dth_Rt_CI_lower, "T_pred": dth_T_pred, "T_CI_upper": dth_T_CI_upper, "T_CI_lower": dth_T_CI_lower, "total_cases": dth_total_cases[2:], "new_cases_ts": dth_new_cases_ts, }) dth_estimates["anomaly"] = dth_estimates["dates"].isin( set(dth_anomaly_dates)) print(" + Rt (dth) today:", inf_Rt_pred[-1]) fig, axs = plt.subplots(1, 2, sharey=True) plt.sca(axs[0]) plt.Rt(inf_dates, inf_Rt_pred, inf_Rt_CI_lower, inf_Rt_CI_upper, CI)\ .axis_labels("date", "$R_t$") plt.title("estimated from infections", loc="left", fontdict=plt.theme.label) # fig, axs = plt.subplots(3, 1, sharex = True) # plt.sca(axs[0]) # plt.plot(dth_dates, delhi_dD_smoothed[2:], color = "orange") # plt.title("d$D$/d$t$", loc = "left", fontdict = plt.theme.label) # plt.sca(axs[1]) # plt.plot(dth_dates, np.diff(delhi_dD_smoothed)[1:], color = "red") # plt.title("d$^2D$/d$t^2$", loc = "left", fontdict = plt.theme.label) plt.sca(axs[1]) plt.Rt(dth_dates, dth_Rt_pred, dth_Rt_CI_lower, dth_Rt_CI_upper, CI)\
gdf = gpd.read_file("data/gadm36_IDN_shp/gadm36_IDN_2.shp")\ .query("NAME_1 == 'Jakarta Raya'")\ .drop(columns=shp_drop_cols) bbox = shapely.geometry.box(minx = 106.65, maxx = 107.00, miny = -6.40, maxy=-6.05) gdf = gdf[gdf.intersects(bbox)] jakarta_districts = dkij.district.str.title().unique() jakarta_cases = dkij.groupby("date_positiveresult")["id"].count().rename("cases") logger.info("running province-level Rt estimate") (dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\ = analytical_MPVS(jakarta_cases, CI = CI, smoothing = smoothing, totals=False) plt.Rt(dates, RR_pred[1:], RR_CI_upper[1:], RR_CI_lower[1:], CI)\ .title("\nDKI Jakarta: Reproductive Number Estimate")\ .xlabel("\ndate")\ .ylabel("$R_t$\n", rotation=0, labelpad=30)\ .annotate(f"\n{window}-day smoothing window, gamma-prior Bayesian estimation method")\ .show() logger.info("running case-forward prediction") prediction_period = 14*days IDN = SIR(name = "IDN", population = 267.7e6, dT0 = T_pred[-1], Rt0 = RR_pred[-1], upper_CI = T_CI_upper[-1], lower_CI = T_CI_lower[-1], mobility = 0, random_seed = 0)\ .run(prediction_period) plt.daily_cases(dates, T_pred[1:], T_CI_upper[1:], T_CI_lower[1:], new_cases_ts[1:], anomaly_dates, anomalies, CI, prediction_ts = [ (IDN.dT[:-1], IDN.lower_CI[1:], IDN.upper_CI[1:], None, "predicted cases") ])\ .title("\nDKI Jakarta: Daily Cases")\ .xlabel("\ndate")\
def generate_report(state_code: str): print(f"Received request for {state_code}.") state = state_code_lookup[state_code] normalized_state = state.replace(" and ", " And ").replace(" & ", " And ") blobs = { f"pipeline/est/{state_code}_state_Rt.csv": f"/tmp/state_Rt_{state_code}.csv", f"pipeline/est/{state_code}_district_Rt.csv": f"/tmp/district_Rt_{state_code}.csv", f"pipeline/commons/maps/{state_code}.json": f"/tmp/state_{state_code}.geojson" } if normalized_state not in dissolved_states else { f"pipeline/est/{state_code}_state_Rt.csv": f"/tmp/state_Rt_{state_code}.csv", } for (blob_name, filename) in blobs.items(): bucket.blob(blob_name).download_to_filename(filename) print(f"Downloaded estimates for {state_code}.") state_Rt = pd.read_csv(f"/tmp/state_Rt_{state_code}.csv", parse_dates=["dates"], index_col=0) plt.close("all") dates = [pd.Timestamp(date).to_pydatetime() for date in state_Rt.dates] plt.Rt(dates, state_Rt.Rt_pred, state_Rt.Rt_CI_lower, state_Rt.Rt_CI_upper, CI)\ .axis_labels("date", "$R_t$")\ .title(f"{state}: $R_t$ over time", ha = "center", x = 0.5)\ .adjust(left = 0.11, bottom = 0.16) plt.gcf().set_size_inches(3840 / 300, 1986 / 300) plt.savefig(f"/tmp/{state_code}_Rt_timeseries.png") plt.close() print(f"Generated timeseries plot for {state_code}.") # check output is at least 50 KB timeseries_size_kb = os.stat( f"/tmp/{state_code}_Rt_timeseries.png").st_size / 1000 print(f"Timeseries artifact size: {timeseries_size_kb} kb") assert timeseries_size_kb > 50 bucket.blob( f"pipeline/rpt/{state_code}_Rt_timeseries.png").upload_from_filename( f"/tmp/{state_code}_Rt_timeseries.png", content_type="image/png") if normalized_state not in (island_states + dissolved_states): district_Rt = pd.read_csv(f"/tmp/district_Rt_{state_code}.csv", parse_dates=["dates"], index_col=0) latest_Rt = district_Rt[district_Rt.dates == district_Rt.dates.max( )].set_index("district")["Rt_pred"].to_dict() top10 = [(k, "> 3.0" if v > 3 else f"{v:.2f}") for (k, v) in sorted( latest_Rt.items(), key=lambda t: t[1], reverse=True)[:10]] gdf = gpd.read_file(f"/tmp/state_{state_code}.geojson") gdf["Rt"] = gdf.district.map(latest_Rt) fig, ax = plt.subplots() fig.set_size_inches(3840 / 300, 1986 / 300) plt.choropleth(gdf, title = None, mappable = plt.get_cmap(0.75, 2.5), fig = fig, ax = ax)\ .adjust(left = 0) plt.sca(fig.get_axes()[0]) plt.PlotDevice(fig).title(f"{state}: $R_t$ by district", ha="center", x=0.5) plt.axis('off') plt.savefig(f"/tmp/{state_code}_Rt_choropleth.png", dpi=300) plt.close() print(f"Generated choropleth for {state_code}.") # check output is at least 100 KB choropleth_size_kb = os.stat( f"/tmp/{state_code}_Rt_choropleth.png").st_size / 1000 print(f"Choropleth artifact size: {choropleth_size_kb} kb") assert choropleth_size_kb > 100 bucket.blob(f"pipeline/rpt/{state_code}_Rt_choropleth.png" ).upload_from_filename( f"/tmp/{state_code}_Rt_choropleth.png", content_type="image/png") else: print(f"Skipped choropleth for {state_code}.") if normalized_state not in dissolved_states: fig, ax = plt.subplots(1, 1) ax.axis('tight') ax.axis('off') table = ax.table(cellText=top10, colLabels=["district", "$R_t$"], loc='center', cellLoc="center") table.scale(1, 2) for (row, col), cell in table.get_celld().items(): if (row == 0): cell.set_text_props(fontfamily=plt.theme.label["family"], fontsize=plt.theme.label["size"], fontweight="semibold") else: cell.set_text_props(fontfamily=plt.theme.label["family"], fontsize=plt.theme.label["size"], fontweight="light") plt.PlotDevice().title(f"{state}: top districts by $R_t$", ha="center", x=0.5) plt.savefig(f"/tmp/{state_code}_Rt_top10.png", dpi=600) plt.close() print(f"Generated top 10 district listing for {state_code}.") # check output is at least 50 KB top10_size_kb = os.stat( f"/tmp/{state_code}_Rt_top10.png").st_size / 1000 print(f"Top 10 listing artifact size: {top10_size_kb} kb") assert top10_size_kb > 50 bucket.blob( f"pipeline/rpt/{state_code}_Rt_top10.png").upload_from_filename( f"/tmp/{state_code}_Rt_top10.png", content_type="image/png") else: print(f"Skipped top 10 district listing for {state_code}.") # sleep for 15 seconds to ensure the images finish saving time.sleep(15) print(f"Uploaded artifacts for {state_code}.") return "OK!"
plt.legend(framealpha=1, handlelength=1, loc="best") plt.PlotDevice().xlabel("time").ylabel("reproductive rate").adjust(left=0.10, bottom=0.15, right=0.99, top=0.99) plt.ylim(0.5, 1.5) plt.show() # 1: parametric scheme: dates, Rt, Rt_lb, Rt_ub, *_, anomalies, anomaly_dates = analytical_MPVS( pd.DataFrame(sir_model.dT), smoothing=convolution("uniform", 2), CI=0.99, totals=False) pd = plt.Rt(dates, Rt, Rt_ub, Rt_lb, ymin = 0.5, ymax = 2.5, CI = 0.99, yaxis_colors = False, format_dates = False, critical_threshold = False)\ .xlabel("time")\ .ylabel("reproductive rate")\ .adjust(left = 0.11, bottom = 0.15, right = 0.98, top = 0.98) plt.plot(sir_model.Rt, "-", color="white", linewidth=3, zorder=10) sim_rt, = plt.plot(sir_model.Rt, "-", color="dodgerblue", linewidth=2, zorder=11) anoms = plt.vlines(anomaly_dates, 0, 4, colors="red", linewidth=2, alpha=0.5) plt.legend([pd.markers["Rt"], sim_rt, anoms], ["Estimated $R_t$ (99% CI)", "simulated $R_t$", "anomalies"], **pd.legend_props) plt.show() # 2: naive MCMC model, trace, summary = parametric_scheme_mcmc(sir_model.dT,
"dates": dates, "Rt_pred": Rt_pred, "RR_CI_upper": RR_CI_upper, "RR_CI_lower": RR_CI_lower, "T_pred": T_pred, "T_CI_upper": T_CI_upper, "T_CI_lower": T_CI_lower, "total_cases": total_cases[2:], "new_cases_ts": new_cases_ts, }) print(" + Rt today:", Rt_pred[-5:]) plt.Rt(dates, Rt_pred, RR_CI_lower, RR_CI_upper, CI)\ .ylabel("Estimated $R_t$")\ .xlabel("Date")\ .title(state)\ .size(11, 8)\ .save(figs/f"Rt_est_{state}.png", dpi=600, bbox_inches="tight")\ .show() estimates["anomaly"] = estimates["dates"].isin(set(anomaly_dates)) estimates.to_csv(data / f"india_rt_data_{state}_{data_recency}_run{run_date}.csv") tn_ts = get_time_series(df.query("detected_state == 'Tamil Nadu'"), "detected_district") for district in tn_ts.index.get_level_values(0).unique()[19:]: print(district) print(" + running estimation...") (dates, Rt_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates) = analytical_MPVS(
if (i + 1, j + 1) in coords.values(): continue ax_nest[j, i].axis("off") for ((state, district), (x, y)) in coords.items(): plt.sca(ax_nest[y - 1, x - 1]) urban_share = int( (1 - serodist.loc[state, ("New " if district == "Delhi" else "") + district]["rural_share"].mean()) * 100) density = pop_density.loc[state, district].density rt_data = district_estimates.loc[state, district].set_index( "dates")["Feb 1, 2021":] plt.Rt(rt_data.index, rt_data.Rt_pred, rt_data.RR_CI_upper, rt_data.RR_CI_lower, 0.95, yaxis_colors=False, ymin=0.5, ymax=2.0) if (x, y) != (4, 1): plt.gca().get_legend().remove() plt.gca().set_xticks([ pd.Timestamp("February 1, 2021"), pd.Timestamp("March 1, 2021"), pd.Timestamp("April 1, 2021") ]) plt.PlotDevice()\ .l_title(district, fontsize = 12)\ .r_title(f"{urban_share}% urban, {density}/km$^2$", fontsize = 10)
# generation_interval = generation_interval[(generation_interval.index >= 0) & (generation_interval.index <= 60)] # generation_interval /= generation_interval.sum() new_cases = cases.confirmed.value_counts().sort_index() new_cases_smoothed = smoothing(new_cases) plt.plot(new_cases, '.', color="blue") plt.plot(new_cases.index, new_cases_smoothed, '-', color="black") plt.show() logger.info("running province-level Rt estimate") (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\ = analytical_MPVS(new_cases, CI = CI, smoothing = smoothing, totals = False) plt.Rt(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, CI)\ .title("\nSouth Sulawesi: Reproductive Number Estimate")\ .xlabel("\ndate")\ .ylabel("$R_t$\n", rotation=0, labelpad=30)\ .annotate(f"\n{window}-day smoothing window, gamma-prior Bayesian estimation method")\ .show() logger.info("running case-forward prediction") prediction_period = 14 * days I0 = (~cases.confirmed.isna()).sum() - (~cases.recovered.isna()).sum() - ( ~cases.died.isna()).sum() IDN = SIR(name = "IDN", population = 8_819_500, dT0 = T_pred[-1], Rt0 = Rt_pred[-1], upper_CI = T_CI_upper[-1], lower_CI = T_CI_lower[-1], mobility = 0, random_seed = 0, I0 = I0)\ .run(prediction_period) plt.daily_cases(dates, T_pred, T_CI_upper, T_CI_lower, new_cases_ts, anomaly_dates, anomalies, CI, prediction_ts = [ (IDN.dT[:-1], IDN.lower_CI[1:], IDN.upper_CI[1:], plt.PRED_PURPLE, "predicted cases") ])\ .title("\nSouth Sulawesi: Daily Cases")\
.size(9.5, 6)\ .save(figs / "fig_1.svg")\ .show() # estimate Rt from epimargin.estimators import analytical_MPVS (dates, Rt, Rt_CI_upper, Rt_CI_lower, *_) = analytical_MPVS(training_cases, smoother, infectious_period=10, totals=False) plt.Rt(dates[1:], Rt[1:], Rt_CI_upper[1:], Rt_CI_lower[1:], 0.95, legend_loc = "upper left")\ .l_title("$R_t$ over time for Mumbai")\ .axis_labels(x = "date", y = "reproductive rate")\ .adjust(bottom = 0.15, left = 0.15)\ .size(9.5, 6)\ .save(figs / "fig_2.svg")\ .show() # set up model from epimargin.models import SIR num_sims = 100 N0 = 12.48e6 R0, D0 = daily_reports.loc[end][["recovered", "deceased"]] I0 = smoothed_cases[:end].sum() dT0 = smoothed_cases[end] S0 = N0 - I0 - R0 - D0 Rt0 = Rt[-1] * N0 / S0 no_lockdown = SIR(name="no lockdown",
state_code = "MH" state_Rt = pd.read_csv( "/Users/satej/Downloads/pipeline_est_MH_state_Rt (1).csv", parse_dates=["dates"], index_col=0) district_Rt = pd.read_csv( "/Users/satej/Downloads/pipeline_est_MH_district_Rt (2).csv", parse_dates=["dates"], index_col=0) latest_Rt = district_Rt[district_Rt.dates == district_Rt.dates.max( )].set_index("district")["Rt_pred"].to_dict() plt.Rt(list(state_Rt.dates), state_Rt.Rt_pred, state_Rt.Rt_CI_lower, state_Rt.Rt_CI_upper, CI)\ .axis_labels("date", "$R_t$")\ .title("Maharashtra: $R_t$ over time", ha = "center", x = 0.5)\ .adjust(left = 0.11, bottom = 0.16) plt.gcf().set_size_inches(3840 / 300, 1986 / 300) plt.savefig("./MH_Rt_timeseries.png") plt.clf() gdf = gpd.read_file("data/maharashtra.json", dpi=600) gdf["Rt"] = gdf.district.map(latest_Rt) fig, ax = plt.subplots() fig.set_size_inches(3840 / 300, 1986 / 300) plt.choropleth(gdf, title = None, mappable = plt.get_cmap(0.75, 2.5), fig = fig, ax = ax)\ .adjust(left = 0) plt.sca(fig.get_axes()[0]) plt.PlotDevice(fig).title(f"{state}: $R_t$ by district", ha="center", x=0.5) plt.axis('off')
bgn = min(cases.index.min() for cases in province_cases.values()) end = max(cases.index.max() for cases in province_cases.values()) idx = pd.date_range(bgn, end) province_cases = { province: cases.reindex(idx, method="pad").fillna(0) for (province, cases) in province_cases.items() } natl_cases = sum(province_cases.values()) logger.info("running national-level Rt estimate") (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\ = analytical_MPVS(natl_cases, CI = CI, smoothing = smoothing) plt.Rt(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, CI, ymin=0, ymax=4)\ .title("\nIndonesia: Reproductive Number Estimate")\ .xlabel("\ndate")\ .ylabel("$R_t$", rotation=0, labelpad=30)\ .annotate(f"\n{window}-day smoothing window, gamma-prior Bayesian estimation method")\ .show() logger.info("running case-forward prediction") IDN = SIR("IDN", 267.7e6, dT0=T_pred[-1], Rt0=Rt_pred[-1], mobility=0, random_seed=0).run(14) logger.info("province-level projections") migration = np.zeros((len(provinces), len(provinces))) estimates = [] max_len = 1 + max(map(len, provinces))
cases = cases.dropna(subset=["age"]) cases["age_bin"] = pd.cut(cases.age, bins=[0] + list(range(20, 80, 10)) + [100]) age_ts = cases[["age_bin", "confirmed"]].groupby(["age_bin", "confirmed"]).size().sort_index() ss_max_rts = {} fig, axs = plt.subplots(4, 2, True, True) (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\ = analytical_MPVS(age_ts.sum(level = 1), CI = CI, smoothing = notched_smoothing(window = 5), totals = False) plt.sca(axs.flat[0]) plt.Rt(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, CI).annotate(f"all ages").adjust(left=0.04, right=0.96, top=0.95, bottom=0.05, hspace=0.3, wspace=0.15) r = pd.Series(Rt_pred, index=dates) ss_max_rts["all"] = r[r.index.month_name() == "April"].max() for (age_bin, ax) in zip(age_ts.index.get_level_values(0).categories, axs.flat[1:]): print(age_bin) (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\ = analytical_MPVS(age_ts.loc[age_bin], CI = CI, smoothing = smoothing, totals = False) plt.sca(ax) plt.Rt(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, CI).annotate(f"age bin: {age_bin}") ax.get_legend().remove()