예제 #1
0
def plot_mobility(series, label, stringency = None, until = None, annotation = "Google Mobility Data; baseline mobility measured from Jan 3 - Feb 6"):
    plt.plot(series.date, smoothed(series.retail_and_recreation_percent_change_from_baseline), label = "Retail/Recreation")
    plt.plot(series.date, smoothed(series.grocery_and_pharmacy_percent_change_from_baseline),  label = "Grocery/Pharmacy")
    plt.plot(series.date, smoothed(series.parks_percent_change_from_baseline),                 label = "Parks")
    plt.plot(series.date, smoothed(series.transit_stations_percent_change_from_baseline),      label = "Transit Stations")
    plt.plot(series.date, smoothed(series.workplaces_percent_change_from_baseline),            label = "Workplaces")
    plt.plot(series.date, smoothed(series.residential_percent_change_from_baseline),           label = "Residential")
    if until:
        right = pd.Timestamp(until)
    elif stringency is not None:
        right = stringency.Date.max()
    else:
        right = series.date.iloc[-1]
    lax = plt.gca()
    if stringency is not None: 
        plt.sca(lax.twinx())
        stringency_IN = stringency.query("CountryName == 'India'")
        stringency_US = stringency.query("(CountryName == 'United States') & (RegionName.isnull())", engine = "python")
        plt.plot(stringency_IN.Date, stringency_IN.StringencyIndex, 'k--', alpha = 0.6, label = "IN Measure Stringency")
        plt.plot(stringency_US.Date, stringency_US.StringencyIndex, 'k.' , alpha = 0.6, label = "US Measure Stringency")
        plt.PlotDevice().ylabel("lockdown stringency index", rotation = -90, labelpad = 50)
        plt.legend()
        plt.sca(lax)
    plt.legend(loc = "lower right")
    plt.fill_betweenx((-100, 60), pd.to_datetime("March 24, 2020"), pd.to_datetime("June 1, 2020"), color = "black", alpha = 0.05, zorder = -1)
    plt.text(s = "national lockdown", x = pd.to_datetime("April 27, 2020"), y = -90, fontdict = plt.theme.note, ha = "center", va = "top")
    plt.PlotDevice()\
        .xlabel("\ndate")\
        .ylabel("% change in mobility\n")
        # .title(f"\n{label}: Mobility & Lockdown Trends")\
        # .annotate(annotation)\
    plt.ylim(-100, 60)

    plt.xlim(left = series.date.iloc[0], right = right)
예제 #2
0
            "dates": dth_dates,
            "Rt_pred": dth_Rt_pred,
            "Rt_CI_upper": dth_Rt_CI_upper,
            "Rt_CI_lower": dth_Rt_CI_lower,
            "T_pred": dth_T_pred,
            "T_CI_upper": dth_T_CI_upper,
            "T_CI_lower": dth_T_CI_lower,
            "total_cases": dth_total_cases[2:],
            "new_cases_ts": dth_new_cases_ts,
        })
    dth_estimates["anomaly"] = dth_estimates["dates"].isin(
        set(dth_anomaly_dates))
    print("  + Rt (dth) today:", inf_Rt_pred[-1])

    fig, axs = plt.subplots(1, 2, sharey=True)
    plt.sca(axs[0])
    plt.Rt(inf_dates, inf_Rt_pred, inf_Rt_CI_lower, inf_Rt_CI_upper, CI)\
        .axis_labels("date", "$R_t$")
    plt.title("estimated from infections",
              loc="left",
              fontdict=plt.theme.label)

    # fig, axs = plt.subplots(3, 1, sharex = True)
    # plt.sca(axs[0])
    # plt.plot(dth_dates, delhi_dD_smoothed[2:], color = "orange")
    # plt.title("d$D$/d$t$", loc = "left", fontdict = plt.theme.label)

    # plt.sca(axs[1])
    # plt.plot(dth_dates, np.diff(delhi_dD_smoothed)[1:], color = "red")
    # plt.title("d$^2D$/d$t^2$", loc = "left", fontdict = plt.theme.label)
예제 #3
0
def generate_report(state_code: str):
    print(f"Received request for {state_code}.")
    state = state_code_lookup[state_code]
    normalized_state = state.replace(" and ", " And ").replace(" & ", " And ")
    blobs = {
        f"pipeline/est/{state_code}_state_Rt.csv":
        f"/tmp/state_Rt_{state_code}.csv",
        f"pipeline/est/{state_code}_district_Rt.csv":
        f"/tmp/district_Rt_{state_code}.csv",
        f"pipeline/commons/maps/{state_code}.json":
        f"/tmp/state_{state_code}.geojson"
    } if normalized_state not in dissolved_states else {
        f"pipeline/est/{state_code}_state_Rt.csv":
        f"/tmp/state_Rt_{state_code}.csv",
    }
    for (blob_name, filename) in blobs.items():
        bucket.blob(blob_name).download_to_filename(filename)
    print(f"Downloaded estimates for {state_code}.")

    state_Rt = pd.read_csv(f"/tmp/state_Rt_{state_code}.csv",
                           parse_dates=["dates"],
                           index_col=0)

    plt.close("all")
    dates = [pd.Timestamp(date).to_pydatetime() for date in state_Rt.dates]
    plt.Rt(dates, state_Rt.Rt_pred, state_Rt.Rt_CI_lower, state_Rt.Rt_CI_upper, CI)\
        .axis_labels("date", "$R_t$")\
        .title(f"{state}: $R_t$ over time", ha = "center", x = 0.5)\
        .adjust(left = 0.11, bottom = 0.16)
    plt.gcf().set_size_inches(3840 / 300, 1986 / 300)
    plt.savefig(f"/tmp/{state_code}_Rt_timeseries.png")
    plt.close()
    print(f"Generated timeseries plot for {state_code}.")

    # check output is at least 50 KB
    timeseries_size_kb = os.stat(
        f"/tmp/{state_code}_Rt_timeseries.png").st_size / 1000
    print(f"Timeseries artifact size: {timeseries_size_kb} kb")
    assert timeseries_size_kb > 50
    bucket.blob(
        f"pipeline/rpt/{state_code}_Rt_timeseries.png").upload_from_filename(
            f"/tmp/{state_code}_Rt_timeseries.png", content_type="image/png")

    if normalized_state not in (island_states + dissolved_states):
        district_Rt = pd.read_csv(f"/tmp/district_Rt_{state_code}.csv",
                                  parse_dates=["dates"],
                                  index_col=0)
        latest_Rt = district_Rt[district_Rt.dates == district_Rt.dates.max(
        )].set_index("district")["Rt_pred"].to_dict()
        top10 = [(k, "> 3.0" if v > 3 else f"{v:.2f}") for (k, v) in sorted(
            latest_Rt.items(), key=lambda t: t[1], reverse=True)[:10]]

        gdf = gpd.read_file(f"/tmp/state_{state_code}.geojson")
        gdf["Rt"] = gdf.district.map(latest_Rt)
        fig, ax = plt.subplots()
        fig.set_size_inches(3840 / 300, 1986 / 300)
        plt.choropleth(gdf, title = None, mappable = plt.get_cmap(0.75, 2.5), fig = fig, ax = ax)\
            .adjust(left = 0)
        plt.sca(fig.get_axes()[0])
        plt.PlotDevice(fig).title(f"{state}: $R_t$ by district",
                                  ha="center",
                                  x=0.5)
        plt.axis('off')
        plt.savefig(f"/tmp/{state_code}_Rt_choropleth.png", dpi=300)
        plt.close()
        print(f"Generated choropleth for {state_code}.")

        # check output is at least 100 KB
        choropleth_size_kb = os.stat(
            f"/tmp/{state_code}_Rt_choropleth.png").st_size / 1000
        print(f"Choropleth artifact size: {choropleth_size_kb} kb")
        assert choropleth_size_kb > 100
        bucket.blob(f"pipeline/rpt/{state_code}_Rt_choropleth.png"
                    ).upload_from_filename(
                        f"/tmp/{state_code}_Rt_choropleth.png",
                        content_type="image/png")
    else:
        print(f"Skipped choropleth for {state_code}.")

    if normalized_state not in dissolved_states:
        fig, ax = plt.subplots(1, 1)
        ax.axis('tight')
        ax.axis('off')
        table = ax.table(cellText=top10,
                         colLabels=["district", "$R_t$"],
                         loc='center',
                         cellLoc="center")
        table.scale(1, 2)
        for (row, col), cell in table.get_celld().items():
            if (row == 0):
                cell.set_text_props(fontfamily=plt.theme.label["family"],
                                    fontsize=plt.theme.label["size"],
                                    fontweight="semibold")
            else:
                cell.set_text_props(fontfamily=plt.theme.label["family"],
                                    fontsize=plt.theme.label["size"],
                                    fontweight="light")
        plt.PlotDevice().title(f"{state}: top districts by $R_t$",
                               ha="center",
                               x=0.5)
        plt.savefig(f"/tmp/{state_code}_Rt_top10.png", dpi=600)
        plt.close()
        print(f"Generated top 10 district listing for {state_code}.")

        # check output is at least 50 KB
        top10_size_kb = os.stat(
            f"/tmp/{state_code}_Rt_top10.png").st_size / 1000
        print(f"Top 10 listing artifact size: {top10_size_kb} kb")
        assert top10_size_kb > 50
        bucket.blob(
            f"pipeline/rpt/{state_code}_Rt_top10.png").upload_from_filename(
                f"/tmp/{state_code}_Rt_top10.png", content_type="image/png")
    else:
        print(f"Skipped top 10 district listing for {state_code}.")

    # sleep for 15 seconds to ensure the images finish saving
    time.sleep(15)

    print(f"Uploaded artifacts for {state_code}.")
    return "OK!"
예제 #4
0
# data prep
with (data/'timeseries.json').open("rb") as fp:
    df = flat_table.normalize(pd.read_json(fp)).fillna(0)
df.columns = df.columns.str.split('.', expand = True)
dates = np.squeeze(df["index"][None].values)
df = df.drop(columns = "index").set_index(dates).stack([1, 2]).drop("UN", axis = 1)

series = mobility[mobility.sub_region_1.isna()]
plt.plot(series.date, smoothed(series.retail_and_recreation_percent_change_from_baseline), label = "Retail/Recreation")
plt.fill_betweenx((-100, 60), pd.to_datetime("March 24, 2020"), pd.to_datetime("June 1, 2020"), color = "black", alpha = 0.05, zorder = -1)
plt.text(s = "national lockdown", x = pd.to_datetime("April 27, 2020"), y = -20, fontdict = plt.note_font, ha = "center", va = "top")
plt.ylim(-100, 10)
plt.xlim(series.date.min(), series.date.max())
plt.legend(loc = 'upper right')
lax = plt.gca()
plt.sca(lax.twinx())
plt.plot(df["TT"][:, "delta", "confirmed"].index, smoothed(df["TT"][:, "delta", "confirmed"].values), label = "Daily Cases", color = plt.PRED_PURPLE)
plt.legend(loc = 'lower right')
plt.PlotDevice().ylabel("new cases", rotation = -90, labelpad = 50)
plt.sca(lax)
plt.PlotDevice().title("\nIndia Mobility and Case Count Trends")\
    .annotate("Google Mobility Data + Covid19India.org")\
    .xlabel("\ndate")\
    .ylabel("% change in mobility\n")
plt.show()

plt.plot(series.date, smoothed(series.retail_and_recreation_percent_change_from_baseline), label = "Retail/Recreation")
plt.fill_betweenx((-100, 60), pd.to_datetime("March 24, 2020"), pd.to_datetime("June 1, 2020"), color = "black", alpha = 0.05, zorder = -1)
plt.text(s = "national lockdown", x = pd.to_datetime("April 27, 2020"), y = -20, fontdict = plt.note_font, ha = "center", va = "top")
plt.ylim(-100, 10)
plt.xlim(series.date.min(), series.date.max())
예제 #5
0
xticks = {
    "Surat", "Narmada", "Mumbai", "Thane", "Pune", "Aurangabad", "Parbhani",
    "Nanded", "Yavatmal", "Chennai"
}

pop_density = pd.read_csv(data / "popdensity.csv").set_index(
    ["state", "district"])
fig, ax_nest = plt.subplots(ncols=ncols, nrows=nrows)
for (j, i) in product(range(nrows), range(ncols)):
    if (i + 1, j + 1) in coords.values():
        continue
    ax_nest[j, i].axis("off")

for ((state, district), (x, y)) in coords.items():
    plt.sca(ax_nest[y - 1, x - 1])
    urban_share = int(
        (1 - serodist.loc[state, ("New " if district == "Delhi" else "") +
                          district]["rural_share"].mean()) * 100)
    density = pop_density.loc[state, district].density
    rt_data = district_estimates.loc[state, district].set_index(
        "dates")["Feb 1, 2021":]
    plt.Rt(rt_data.index,
           rt_data.Rt_pred,
           rt_data.RR_CI_upper,
           rt_data.RR_CI_lower,
           0.95,
           yaxis_colors=False,
           ymin=0.5,
           ymax=2.0)
    if (x, y) != (4, 1):
예제 #6
0
            (regency, Rt_pred[-1], Rt_CI_lower[-1], Rt_CI_upper[-1],
             linear_projection(dates, Rt_pred, 7)))
estimates = pd.DataFrame(estimates)
estimates.columns = ["regency", "Rt", "Rt_CI_lower", "Rt_CI_upper", "Rt_proj"]
estimates.set_index("regency", inplace=True)
estimates.to_csv("data/SULSEL_Rt_projections.csv")
print(estimates)

gdf = gpd.read_file("data/gadm36_IDN_shp/gadm36_IDN_2.shp")\
    .query("NAME_1 == 'Sulawesi Selatan'")\
    .merge(estimates, left_on = "NAME_2", right_on = "regency")

choro = plt.choropleth(gdf, mappable=plt.get_cmap(0.4, 1.4, "viridis"))

for ax in choro.figure.axes[:-1]:
    plt.sca(ax)
    plt.xlim(left=119, right=122)
    plt.ylim(bottom=-7.56, top=-1.86)

plt.show()

logger.info("adaptive control")
(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
    = analytical_MPVS(new_cases, CI = CI, smoothing = smoothing, totals = False)
Rt = pd.DataFrame(data={"Rt": Rt_pred[1:]}, index=dates)
Rt_current = Rt_pred[-1]
Rt_m = np.mean(Rt[(Rt.index >= "April 21, 2020")
                  & (Rt.index <= "May 22, 2020")])[0]
Rt_v = np.mean(Rt[(Rt.index <= "April 14, 2020")])[0]

Rt_m_scaled = Rt_current + 0.75 * (Rt_m - Rt_current)
예제 #7
0
plt.Rt(list(state_Rt.dates), state_Rt.Rt_pred, state_Rt.Rt_CI_lower, state_Rt.Rt_CI_upper, CI)\
    .axis_labels("date", "$R_t$")\
    .title("Maharashtra: $R_t$ over time", ha = "center", x = 0.5)\
    .adjust(left = 0.11, bottom = 0.16)
plt.gcf().set_size_inches(3840 / 300, 1986 / 300)
plt.savefig("./MH_Rt_timeseries.png")
plt.clf()

gdf = gpd.read_file("data/maharashtra.json", dpi=600)

gdf["Rt"] = gdf.district.map(latest_Rt)
fig, ax = plt.subplots()
fig.set_size_inches(3840 / 300, 1986 / 300)
plt.choropleth(gdf, title = None, mappable = plt.get_cmap(0.75, 2.5), fig = fig, ax = ax)\
    .adjust(left = 0)
plt.sca(fig.get_axes()[0])
plt.PlotDevice(fig).title(f"{state}: $R_t$ by district", ha="center", x=0.5)
plt.axis('off')
plt.savefig(f"./{state_code}_Rt_choropleth.png", dpi=300)
plt.clf()

top10 = [
    (k, "> 3.0" if v > 3 else f"{v:.2f}", v)
    for (k,
         v) in sorted(latest_Rt.items(), key=lambda t: t[1], reverse=True)[:10]
]

fig, ax = plt.subplots(1, 1)
ax.axis('tight')
ax.axis('off')
table = ax.table(cellText=[(k, l) for (k, l, v) in top10],
예제 #8
0
    .axis_labels(x = "age group", y = "CFR (log-scaled)")\
    .l_title("CFR in India (adjusted for reporting)")\
    .r_title("source:\nICMR")\
    .adjust(left = 0.11, bottom = 0.15, right = 0.95)
plt.semilogy()
plt.show()

# fig 3
india_data = pd.read_csv(results / "india_data.csv", parse_dates = ["dt"])\
    .query("State == 'TT'")\
    .set_index("dt")\
    .sort_index()

fig, axs = plt.subplots(2, 2, sharex=True, sharey=True)

plt.sca(axs[0, 0])
plt.scatter(india_data.index, india_data["cfr_2week"], color="black", s=2)
plt.title("2-week lag", loc="left", fontdict=plt.theme.label)

plt.sca(axs[0, 1])
plt.scatter(india_data.index, india_data["cfr_maxcor"], color="black", s=2)
plt.title("10-day lag", loc="left", fontdict=plt.theme.label)

plt.sca(axs[1, 0])
plt.scatter(india_data.index, india_data["cfr_1week"], color="black", s=2)
plt.title("1-week lag", loc="left", fontdict=plt.theme.label)

plt.sca(axs[1, 1])
plt.scatter(india_data.index, india_data["cfr_same"], color="black", s=2)
plt.title("no lag", loc="left", fontdict=plt.theme.label)
plt.gca().xaxis.set_major_formatter(formatter)
예제 #9
0
parse_datetimes(cases.loc[:, "confirmed"])
cases.regency = cases.regency.str.title().map(
    lambda s: regency_names.get(s, s))
cases.age = cases.age.apply(parse_age)
cases = cases.dropna(subset=["age"])
cases["age_bin"] = pd.cut(cases.age,
                          bins=[0] + list(range(20, 80, 10)) + [100])
age_ts = cases[["age_bin",
                "confirmed"]].groupby(["age_bin",
                                       "confirmed"]).size().sort_index()
ss_max_rts = {}

fig, axs = plt.subplots(4, 2, True, True)
(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
    = analytical_MPVS(age_ts.sum(level = 1), CI = CI, smoothing = notched_smoothing(window = 5), totals = False)
plt.sca(axs.flat[0])
plt.Rt(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower,
       CI).annotate(f"all ages").adjust(left=0.04,
                                        right=0.96,
                                        top=0.95,
                                        bottom=0.05,
                                        hspace=0.3,
                                        wspace=0.15)
r = pd.Series(Rt_pred, index=dates)
ss_max_rts["all"] = r[r.index.month_name() == "April"].max()

for (age_bin,
     ax) in zip(age_ts.index.get_level_values(0).categories, axs.flat[1:]):
    print(age_bin)
    (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
        = analytical_MPVS(age_ts.loc[age_bin], CI = CI, smoothing = smoothing, totals = False)