# select order by minimizing AIC where coefficient on number of tests > 0 models = [ OLS.from_formula(scaling(order), data=obs).fit() for order in range(1, 10) ] (model_idx, selected_model) = min( ((i, each) for (i, each) in enumerate(models) if each.params["tested"] > 0), key=lambda _: _[1].aic) print(" i aic r2 beta") for (i, model) in enumerate(models): print("*" if i == model_idx else " ", i + 1, model.aic.round(2), model.rsquared.round(2), model.params["tested"].round(2)) scale_factor = selected_model.params["tested"] plt.plot(0.2093 * df[state][:, "delta", "tested"], label="national test-scaled") plt.plot(scale_factor * df[state][:, "delta", "tested"], label="state test-scaled") plt.plot(df[state][:, "delta", "confirmed"], label="confirmed") plt.legend() plt.PlotDevice().title(f"\n{state} / case scaling comparison").xlabel( "\ndate").ylabel("cases\n") plt.show() # I vs D estimators gamma = 0.2 window = 7 * days CI = 0.95 smooth = notched_smoothing(window) (dates_I, Rt_I, Rtu_I, Rtl_I, *_) = analytical_MPVS(df[state][:, "delta",
# plot simulation plt.scatter(dT_conf["April 1, 2020":simulation_start].index, dT_conf["April 1, 2020":simulation_start].values * T_ratio, label="seroprevalence-scaled cases (pre-simulation)", color="black", s=5) # plt.scatter(dT_conf[simulation_start:].index, dT_conf[simulation_start:].values*T_ratio, color = "grey", label = "seroprevalence-scaled cases (post-simulation)", s = 5) # t = pd.Timestamp(date) dates = pd.date_range( simulation_start, simulation_start + pd.Timedelta(len(model.dT) - 1, "days")) # dates = pd.date_range(t, pd.Timestamp("April 1, 2021")) n = len(dates) plt.plot(dates, np.array([_.mean().astype(int) for _ in model.dT][:n]), label="mean simulated daily cases", color="rebeccapurple") plt.fill_between(dates, [_.min().astype(int) for _ in model.dT][:n], [_.max().astype(int) for _ in model.dT][:n], label="simulation range", alpha=0.3, color="rebeccapurple") plt.vlines(pd.Timestamp(date), 1, 1e6, linestyles="dashed", label="date of seroprevalence study") plt.legend(handlelength=1, framealpha=1) plt.semilogy() plt.xlim(pd.Timestamp("April 1, 2020"), dates[-1]) plt.ylim(1, 1e6)
.rename(columns = schema)\ .dropna(how = 'all') parse_datetimes(cases.loc[:, "confirmed"]) cases.regency = cases.regency.str.title().map(lambda s: regency_names.get(s, s)) # generation_interval = cases[~cases.symptom_onset.isna() & ~cases.confirmed.isna()]\ # .apply(get_generation_interval, axis = 1)\ # .dropna()\ # .value_counts()\ # .sort_index() # generation_interval = generation_interval[(generation_interval.index >= 0) & (generation_interval.index <= 60)] # generation_interval /= generation_interval.sum() new_cases = cases.confirmed.value_counts().sort_index() new_cases_smoothed = smoothing(new_cases) plt.plot(new_cases, '.', color = "blue") plt.plot(new_cases.index, new_cases_smoothed, '-', color = "black") plt.show() logger.info("running province-level Rt estimate") (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\ = analytical_MPVS(new_cases, CI = CI, smoothing = smoothing, totals = False) plt.Rt(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, CI)\ .title("\nSouth Sulawesi: Reproductive Number Estimate")\ .xlabel("\ndate")\ .ylabel("$R_t$\n", rotation=0, labelpad=30)\ .annotate(f"\n{window}-day smoothing window, gamma-prior Bayesian estimation method")\ .show() logger.info("running case-forward prediction")
population=500000, I0=100, dT0=20, Rt0=1.01, random_seed=0) total_t = 0 schedule = [(1.01, 75), (1.4, 75), (0.9, 75)] R0_timeseries = [] for (R0, t) in schedule: R0_timeseries += [R0] * t sir_model.Rt0 = R0 sir_model.run(t) total_t += t plt.plot(sir_model.dT) plt.show() plt.plot(R0_timeseries, "-", color="black", label="$R_0$") plt.plot(sir_model.Rt, "-", color="dodgerblue", label="$R_t$") plt.legend(framealpha=1, handlelength=1, loc="best") plt.PlotDevice().xlabel("time").ylabel("reproductive rate").adjust(left=0.10, bottom=0.15, right=0.99, top=0.99) plt.ylim(0.5, 1.5) plt.show() # 1: parametric scheme: dates, Rt, Rt_lb, Rt_ub, *_, anomalies, anomaly_dates = analytical_MPVS( pd.DataFrame(sir_model.dT), smoothing=convolution("uniform", 2),
def plot_mobility( series, label, stringency=None, until=None, annotation="Google Mobility Data; baseline mobility measured from Jan 3 - Feb 6" ): plt.plot(series.date, smoothed( series.retail_and_recreation_percent_change_from_baseline), label="Retail/Recreation") plt.plot(series.date, smoothed( series.grocery_and_pharmacy_percent_change_from_baseline), label="Grocery/Pharmacy") plt.plot(series.date, smoothed(series.parks_percent_change_from_baseline), label="Parks") plt.plot(series.date, smoothed(series.transit_stations_percent_change_from_baseline), label="Transit Stations") plt.plot(series.date, smoothed(series.workplaces_percent_change_from_baseline), label="Workplaces") plt.plot(series.date, smoothed(series.residential_percent_change_from_baseline), label="Residential") if until: right = pd.Timestamp(until) elif stringency is not None: right = stringency.Date.max() else: right = series.date.iloc[-1] lax = plt.gca() if stringency is not None: plt.sca(lax.twinx()) stringency_IN = stringency.query("CountryName == 'India'") stringency_US = stringency.query( "(CountryName == 'United States') & (RegionName.isnull())", engine="python") plt.plot(stringency_IN.Date, stringency_IN.StringencyIndex, 'k--', alpha=0.6, label="IN Measure Stringency") plt.plot(stringency_US.Date, stringency_US.StringencyIndex, 'k.', alpha=0.6, label="US Measure Stringency") plt.PlotDevice().ylabel("lockdown stringency index", rotation=-90, labelpad=50) plt.legend() plt.sca(lax) plt.legend(loc="upper left") plt.fill_betweenx((-100, 60), pd.to_datetime("March 24, 2020"), pd.to_datetime("June 1, 2020"), color="black", alpha=0.05, zorder=-1) plt.text(s="national lockdown", x=pd.to_datetime("April 27, 2020"), y=-90, fontdict=plt.note_font, ha="center", va="top") plt.PlotDevice()\ .title(f"\n{label}: Mobility & Lockdown Trends")\ .annotate(annotation)\ .xlabel("\ndate")\ .ylabel("% change in mobility\n") plt.ylim(-100, 60) plt.xlim(left=series.date.iloc[0], right=right)
from adaptive.etl.commons import download_data from pathlib import Path data = Path("./data") download_data(data, 'timeseries.json', "https://api.covid19india.org/v3/") # data prep with (data / 'timeseries.json').open("rb") as fp: df = flat_table.normalize(pd.read_json(fp)).fillna(0) df.columns = df.columns.str.split('.', expand=True) dates = np.squeeze(df["index"][None].values) df = df.drop(columns="index").set_index(dates).stack([1, 2]).drop("UN", axis=1) series = mobility[mobility.sub_region_1.isna()] plt.plot(series.date, smoothed(series.retail_and_recreation_percent_change_from_baseline), label="Retail/Recreation") plt.fill_betweenx((-100, 60), pd.to_datetime("March 24, 2020"), pd.to_datetime("June 1, 2020"), color="black", alpha=0.05, zorder=-1) plt.text(s="national lockdown", x=pd.to_datetime("April 27, 2020"), y=-20, fontdict=plt.note_font, ha="center", va="top") plt.ylim(-100, 10) plt.xlim(series.date.min(), series.date.max())
valid_idx = ~df.isna() & df.str.endswith("20") valid = df[valid_idx] monthfirst_idx = valid.str.endswith("/20") # short years -> month first notation valid.loc[( monthfirst_idx)] = pd.to_datetime(valid[( monthfirst_idx)], errors = 'coerce', format = "%m/%d/%y", dayfirst = False) valid.loc[(~monthfirst_idx)] = pd.to_datetime(valid[(~monthfirst_idx)], errors = 'coerce', format = "%d/%m/%Y", dayfirst = True) # assert df.max() <= pd.to_datetime("October 03, 2020"), "date parsing resulted in future dates" df.loc[valid_idx] = valid.apply(pd.Timestamp) sulsel = pd.read_csv("data/3 OCT 2020 Data collection template update South Sulawesi_CASE.csv", usecols = schema.keys())\ .rename(columns = schema)\ .dropna(how = 'all') parse_datetimes(sulsel.loc[:, "confirmed"]) sulsel = sulsel.confirmed.value_counts().sort_index() plt.plot(dkij.index, dkij.values, color = "royalblue", label = "private") plt.plot(dkij_public.diff(), color = "firebrick", label = "public") plt.legend() plt.PlotDevice()\ .title("\nJakarta: public vs private case counts")\ .xlabel("date")\ .ylabel("cases") plt.xlim(right = dkij.index.max()) plt.ylim(top = 800) plt.show() plt.plot(sulsel, color = "royalblue", label = "private", linewidth = 3) plt.plot(sulsel_public.diff(), color = "firebrick", label = "public") plt.legend() plt.PlotDevice()\ .title("\nSouth Sulawesi: public vs private case counts")\
schema = { 'Date': "date", 'Daily change in cumulative total': "daily_tests", 'Cumulative total': "total_tests", 'Cumulative total per thousand': "total_per_thousand", 'Daily change in cumulative total per thousand': "delta_per_thousand", '7-day smoothed daily change': "smoothed_delta", '7-day smoothed daily change per thousand': "smoothed_delta_per_thousand", 'Short-term positive rate': "positivity", 'Short-term tests per case': "tests_per_case" } testing = pd.read_csv("data/covid-testing-all-observations.csv", parse_dates=["Date"]) testing = testing[testing["ISO code"] == "IND"]\ .dropna()\ [schema.keys()]\ .rename(columns = schema) testing["month"] = testing.date.dt.month def formula(order: int) -> str: powers = " + ".join(f"np.power(delta_per_thousand, {i + 1})" for i in range(order)) return f"smoothed_delta ~ -1 + daily_tests + C(month)*({powers})" model = OLS.from_formula(formula(order = 3), data = testing).fit() print(summary_col(model, regressor_order = ["daily_tests"], drop_omitted = True)) plt.plot(0.2093 * df["TT"][:, "delta", "tested"], label = "test-scaled") plt.plot( df["TT"][:, "delta", "confirmed"], label = "confirmed") plt.legend() plt.show()