# select order by minimizing AIC where coefficient on number of tests > 0
models = [
    OLS.from_formula(scaling(order), data=obs).fit() for order in range(1, 10)
]
(model_idx, selected_model) = min(
    ((i, each)
     for (i, each) in enumerate(models) if each.params["tested"] > 0),
    key=lambda _: _[1].aic)
print("  i aic     r2   beta")
for (i, model) in enumerate(models):
    print("*" if i == model_idx else " ", i + 1, model.aic.round(2),
          model.rsquared.round(2), model.params["tested"].round(2))
scale_factor = selected_model.params["tested"]

plt.plot(0.2093 * df[state][:, "delta", "tested"],
         label="national test-scaled")
plt.plot(scale_factor * df[state][:, "delta", "tested"],
         label="state test-scaled")
plt.plot(df[state][:, "delta", "confirmed"], label="confirmed")
plt.legend()
plt.PlotDevice().title(f"\n{state} / case scaling comparison").xlabel(
    "\ndate").ylabel("cases\n")
plt.show()

# I vs D estimators
gamma = 0.2
window = 7 * days
CI = 0.95
smooth = notched_smoothing(window)

(dates_I, Rt_I, Rtu_I, Rtl_I, *_) = analytical_MPVS(df[state][:, "delta",
    # plot simulation
    plt.scatter(dT_conf["April 1, 2020":simulation_start].index,
                dT_conf["April 1, 2020":simulation_start].values * T_ratio,
                label="seroprevalence-scaled cases (pre-simulation)",
                color="black",
                s=5)
    # plt.scatter(dT_conf[simulation_start:].index, dT_conf[simulation_start:].values*T_ratio, color = "grey", label = "seroprevalence-scaled cases (post-simulation)", s = 5)
    # t = pd.Timestamp(date)
    dates = pd.date_range(
        simulation_start,
        simulation_start + pd.Timedelta(len(model.dT) - 1, "days"))
    # dates = pd.date_range(t, pd.Timestamp("April 1, 2021"))
    n = len(dates)
    plt.plot(dates,
             np.array([_.mean().astype(int) for _ in model.dT][:n]),
             label="mean simulated daily cases",
             color="rebeccapurple")
    plt.fill_between(dates, [_.min().astype(int) for _ in model.dT][:n],
                     [_.max().astype(int) for _ in model.dT][:n],
                     label="simulation range",
                     alpha=0.3,
                     color="rebeccapurple")
    plt.vlines(pd.Timestamp(date),
               1,
               1e6,
               linestyles="dashed",
               label="date of seroprevalence study")
    plt.legend(handlelength=1, framealpha=1)
    plt.semilogy()
    plt.xlim(pd.Timestamp("April 1, 2020"), dates[-1])
    plt.ylim(1, 1e6)
        .rename(columns = schema)\
        .dropna(how = 'all')
parse_datetimes(cases.loc[:, "confirmed"])
cases.regency = cases.regency.str.title().map(lambda s: regency_names.get(s, s))

# generation_interval = cases[~cases.symptom_onset.isna() & ~cases.confirmed.isna()]\
#     .apply(get_generation_interval, axis = 1)\
#     .dropna()\
#     .value_counts()\
#     .sort_index()
# generation_interval =  generation_interval[(generation_interval.index >= 0) & (generation_interval.index <= 60)]
# generation_interval /= generation_interval.sum()

new_cases = cases.confirmed.value_counts().sort_index()
new_cases_smoothed = smoothing(new_cases)
plt.plot(new_cases, '.', color = "blue")
plt.plot(new_cases.index, new_cases_smoothed, '-', color = "black")
plt.show()

logger.info("running province-level Rt estimate")
(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
    = analytical_MPVS(new_cases, CI = CI, smoothing = smoothing, totals = False)

plt.Rt(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, CI)\
    .title("\nSouth Sulawesi: Reproductive Number Estimate")\
    .xlabel("\ndate")\
    .ylabel("$R_t$\n", rotation=0, labelpad=30)\
    .annotate(f"\n{window}-day smoothing window, gamma-prior Bayesian estimation method")\
    .show()

logger.info("running case-forward prediction")
                population=500000,
                I0=100,
                dT0=20,
                Rt0=1.01,
                random_seed=0)

total_t = 0
schedule = [(1.01, 75), (1.4, 75), (0.9, 75)]
R0_timeseries = []
for (R0, t) in schedule:
    R0_timeseries += [R0] * t
    sir_model.Rt0 = R0
    sir_model.run(t)
    total_t += t

plt.plot(sir_model.dT)
plt.show()
plt.plot(R0_timeseries, "-", color="black", label="$R_0$")
plt.plot(sir_model.Rt, "-", color="dodgerblue", label="$R_t$")
plt.legend(framealpha=1, handlelength=1, loc="best")
plt.PlotDevice().xlabel("time").ylabel("reproductive rate").adjust(left=0.10,
                                                                   bottom=0.15,
                                                                   right=0.99,
                                                                   top=0.99)
plt.ylim(0.5, 1.5)
plt.show()

# 1: parametric scheme:
dates, Rt, Rt_lb, Rt_ub, *_, anomalies, anomaly_dates = analytical_MPVS(
    pd.DataFrame(sir_model.dT),
    smoothing=convolution("uniform", 2),
Beispiel #5
0
def plot_mobility(
    series,
    label,
    stringency=None,
    until=None,
    annotation="Google Mobility Data; baseline mobility measured from Jan 3 - Feb 6"
):
    plt.plot(series.date,
             smoothed(
                 series.retail_and_recreation_percent_change_from_baseline),
             label="Retail/Recreation")
    plt.plot(series.date,
             smoothed(
                 series.grocery_and_pharmacy_percent_change_from_baseline),
             label="Grocery/Pharmacy")
    plt.plot(series.date,
             smoothed(series.parks_percent_change_from_baseline),
             label="Parks")
    plt.plot(series.date,
             smoothed(series.transit_stations_percent_change_from_baseline),
             label="Transit Stations")
    plt.plot(series.date,
             smoothed(series.workplaces_percent_change_from_baseline),
             label="Workplaces")
    plt.plot(series.date,
             smoothed(series.residential_percent_change_from_baseline),
             label="Residential")
    if until:
        right = pd.Timestamp(until)
    elif stringency is not None:
        right = stringency.Date.max()
    else:
        right = series.date.iloc[-1]
    lax = plt.gca()
    if stringency is not None:
        plt.sca(lax.twinx())
        stringency_IN = stringency.query("CountryName == 'India'")
        stringency_US = stringency.query(
            "(CountryName == 'United States') & (RegionName.isnull())",
            engine="python")
        plt.plot(stringency_IN.Date,
                 stringency_IN.StringencyIndex,
                 'k--',
                 alpha=0.6,
                 label="IN Measure Stringency")
        plt.plot(stringency_US.Date,
                 stringency_US.StringencyIndex,
                 'k.',
                 alpha=0.6,
                 label="US Measure Stringency")
        plt.PlotDevice().ylabel("lockdown stringency index",
                                rotation=-90,
                                labelpad=50)
        plt.legend()
        plt.sca(lax)
    plt.legend(loc="upper left")
    plt.fill_betweenx((-100, 60),
                      pd.to_datetime("March 24, 2020"),
                      pd.to_datetime("June 1, 2020"),
                      color="black",
                      alpha=0.05,
                      zorder=-1)
    plt.text(s="national lockdown",
             x=pd.to_datetime("April 27, 2020"),
             y=-90,
             fontdict=plt.note_font,
             ha="center",
             va="top")
    plt.PlotDevice()\
        .title(f"\n{label}: Mobility & Lockdown Trends")\
        .annotate(annotation)\
        .xlabel("\ndate")\
        .ylabel("% change in mobility\n")
    plt.ylim(-100, 60)

    plt.xlim(left=series.date.iloc[0], right=right)
Beispiel #6
0
from adaptive.etl.commons import download_data
from pathlib import Path

data = Path("./data")
download_data(data, 'timeseries.json', "https://api.covid19india.org/v3/")

# data prep
with (data / 'timeseries.json').open("rb") as fp:
    df = flat_table.normalize(pd.read_json(fp)).fillna(0)
df.columns = df.columns.str.split('.', expand=True)
dates = np.squeeze(df["index"][None].values)
df = df.drop(columns="index").set_index(dates).stack([1, 2]).drop("UN", axis=1)

series = mobility[mobility.sub_region_1.isna()]
plt.plot(series.date,
         smoothed(series.retail_and_recreation_percent_change_from_baseline),
         label="Retail/Recreation")
plt.fill_betweenx((-100, 60),
                  pd.to_datetime("March 24, 2020"),
                  pd.to_datetime("June 1, 2020"),
                  color="black",
                  alpha=0.05,
                  zorder=-1)
plt.text(s="national lockdown",
         x=pd.to_datetime("April 27, 2020"),
         y=-20,
         fontdict=plt.note_font,
         ha="center",
         va="top")
plt.ylim(-100, 10)
plt.xlim(series.date.min(), series.date.max())
    valid_idx = ~df.isna() & df.str.endswith("20")
    valid = df[valid_idx]
    monthfirst_idx = valid.str.endswith("/20") # short years -> month first notation 
    valid.loc[( monthfirst_idx)] = pd.to_datetime(valid[( monthfirst_idx)], errors = 'coerce', format = "%m/%d/%y", dayfirst = False)
    valid.loc[(~monthfirst_idx)] = pd.to_datetime(valid[(~monthfirst_idx)], errors = 'coerce', format = "%d/%m/%Y", dayfirst = True)
    # assert df.max() <= pd.to_datetime("October 03, 2020"), "date parsing resulted in future dates"
    df.loc[valid_idx] = valid.apply(pd.Timestamp)

sulsel = pd.read_csv("data/3 OCT 2020 Data collection template update South Sulawesi_CASE.csv", usecols = schema.keys())\
        .rename(columns = schema)\
        .dropna(how = 'all')
parse_datetimes(sulsel.loc[:, "confirmed"])

sulsel = sulsel.confirmed.value_counts().sort_index()

plt.plot(dkij.index, dkij.values, color = "royalblue", label = "private")
plt.plot(dkij_public.diff(), color = "firebrick", label = "public")
plt.legend()
plt.PlotDevice()\
    .title("\nJakarta: public vs private case counts")\
    .xlabel("date")\
    .ylabel("cases")
plt.xlim(right = dkij.index.max())
plt.ylim(top = 800)
plt.show()

plt.plot(sulsel,               color = "royalblue", label = "private", linewidth = 3)
plt.plot(sulsel_public.diff(), color = "firebrick", label = "public")
plt.legend()
plt.PlotDevice()\
    .title("\nSouth Sulawesi: public vs private case counts")\
schema = { 
    'Date': "date",
    'Daily change in cumulative total': "daily_tests",
    'Cumulative total': "total_tests",
    'Cumulative total per thousand': "total_per_thousand",
    'Daily change in cumulative total per thousand': "delta_per_thousand",
    '7-day smoothed daily change': "smoothed_delta",
    '7-day smoothed daily change per thousand': "smoothed_delta_per_thousand",
    'Short-term positive rate': "positivity",
    'Short-term tests per case': "tests_per_case"
}

testing = pd.read_csv("data/covid-testing-all-observations.csv", parse_dates=["Date"])
testing = testing[testing["ISO code"] == "IND"]\
            .dropna()\
            [schema.keys()]\
            .rename(columns = schema)
testing["month"]     = testing.date.dt.month

def formula(order: int) -> str: 
    powers = " + ".join(f"np.power(delta_per_thousand, {i + 1})" for i in range(order))
    return f"smoothed_delta ~ -1 + daily_tests + C(month)*({powers})"

model = OLS.from_formula(formula(order = 3), data = testing).fit()
print(summary_col(model, regressor_order = ["daily_tests"], drop_omitted = True))

plt.plot(0.2093 * df["TT"][:, "delta", "tested"],    label = "test-scaled")
plt.plot(         df["TT"][:, "delta", "confirmed"], label = "confirmed")
plt.legend()
plt.show()