def assemble_time_series(df): ts = get_time_series(df) deltas = ts[schema.keys()]\ .rename(columns = schema) deltas = deltas.reindex(pd.date_range(deltas.index.min(), deltas.index.max()), fill_value=0) merged = deltas.merge(deltas.cumsum(axis=0).rename(columns=lambda _: _[1]), left_index=True, right_index=True).astype(int) merged.index.name = "date" merged.columns.name = None return merged
} for target in paths['v3'] + paths['v4']: try: download_data(data, target) except: pass df = load_all_data( v3_paths = [data/filepath for filepath in paths['v3']], v4_paths = [data/filepath for filepath in paths['v4']] ) data_recency = str(df["date_announced"].max()).split()[0] run_date = str(pd.Timestamp.now()).split()[0] ts = get_time_series(df, "detected_state") states = ["Maharashtra", "Bihar", "Delhi", "Andhra Pradesh", "Telangana", "Tamil Nadu", "Madhya Pradesh"] for state in states: print(state) print(" + running estimation...") ( dates, Rt_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates ) = analytical_MPVS(ts.loc[state].Hospitalized, CI = CI, smoothing = notched_smoothing(window = smoothing), totals = False) estimates = pd.DataFrame(data = { "dates": dates,
.ylabel("Rt", rotation=0, labelpad=20) plt.ylim(0, 4) # public data paths = { "v3": [data_path(_) for _ in (1, 2)], "v4": [data_path(_) for _ in range(3, 13)] } for target in paths['v3'] + paths['v4']: download_data(data, target) dfn = load_all_data(v3_paths=[data / filepath for filepath in paths['v3']], v4_paths=[data / filepath for filepath in paths['v4']]) state_ts = get_time_series(dfn, "detected_state").loc["Bihar"] district_names, population_counts, _ = etl.district_migration_matrix( data / "Migration Matrix - District.csv") populations = dict(zip(district_names, population_counts)) # first, look at state level predictions (dates_public, RR_pred_public, RR_CI_upper_public, RR_CI_lower_public, T_pred_public, T_CI_upper_public, T_CI_lower_public, total_cases_public, new_cases_ts_public, anomalies_public, anomaly_dates_public) = analytical_MPVS( state_ts.Hospitalized, CI=CI, smoothing=convolution(window=smoothing)) plt.plot(dates_public, RR_pred_public, label="Estimated $R_t$", color="midnightblue") plt.fill_between(dates_public,
b2, a2 = iirnotch(2*f0, 2*Q, fs) b = convolve(b1, b2) a = convolve(a1, a2) notched = pd.Series(filtfilt(b, a, ts)) notched.index = ts.index return notched root = cwd() data = mkdir(root/"data") figs = mkdir(root/"figs") ########################################################### # download latest case data download_data(data, 'state_wise_daily.csv') df = load_statewise_data(data/"state_wise_daily.csv") ts = get_time_series(df, "state") ########################################################### # load delay data api_diff = pd.read_csv(data/"daily_diff.csv", parse_dates=["status_change_date", "report_date"], dayfirst=True) delay = api_diff[(api_diff.current_status == "Hospitalized") & (api_diff.report_date > "2020-08-02")].copy() delay = delay.drop(columns = [col for col in delay.columns if col.startswith("Unnamed")] + ["rowhash"]) delay["newhash"] = delay[["patient_number", "date_announced", "detected_district", "detected_state","current_status", "status_change_date", "num_cases"]].apply(lambda x: hash(tuple(x)), axis = 1) delay = delay.drop_duplicates(subset=["newhash"], keep="first") delay["delay"] = (delay.report_date - delay.status_change_date).dt.days state_hist = delay[["detected_state", "num_cases", "delay"]].set_index(["detected_state", "delay"])["num_cases"].sum(level = [0, 1]).sort_index() state_dist = state_hist/state_hist.sum(level = 0) delay_hist = delay.groupby("delay")["num_cases"].sum() delay_dist = delay_hist/delay_hist.sum()
# set to cloud temp directory if not explicitly told to run locally root = cwd() if len(sys.argv) > 1 and sys.argv[1] == "--local" else Path( "/tmp") data = root / "data" # model details gamma = 0.2 smoothing = 10 CI = 0.95 download_data(data, 'state_wise_daily.csv') state_df = load_statewise_data(data / "state_wise_daily.csv") country_time_series = get_time_series(state_df) estimates = [] timeseries = [] # country level (dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates) = analytical_MPVS( country_time_series["Hospitalized"].iloc[:-1], CI=CI, smoothing=notched_smoothing(window=smoothing)) country_code = state_name_lookup["India"] for row in zip(dates, RR_pred, RR_CI_upper, RR_CI_lower): timeseries.append((country_code, *row))
ywf = fft(y * w) xf = np.linspace(0.0, 1.0 / (2.0 * T), N // 2) plt.plot(xf[1:N // 2], 2 / N * np.abs(ywf[1:N // 2]), ".", alpha=0.7, label=label) root = cwd() data = root / "data" figs = root / "figs" download_data(data, 'state_wise_daily.csv') state_df = load_statewise_data(data / "state_wise_daily.csv") natl_time_series = get_time_series(state_df) time_series = get_time_series(state_df, 'state') # is there chunking in reporting? print("checking average infection differentials...") time_series["delta_I"] = time_series.groupby(level=0)['Hospitalized'].diff() time_series["dow"] = time_series.index.get_level_values(1).dayofweek plot_average_change(time_series, "(All India)", filename=figs / "avg_delta_I_DoW_India.png") for state in tqdm(time_series.index.get_level_values(0).unique()): plot_average_change(time_series.loc[state], f"({state})", filename=figs / f"avg_delta_I_DoW_{state}.png") # are anomalies falling on certain days?
CI = 0.95 paths = { "v3": [data_path(_) for _ in (1, 2)], "v4": [data_path(_) for _ in range(3, 18)] } for target in paths['v3'] + paths['v4']: download_data(data, target) dfn = load_all_data(v3_paths=[data / filepath for filepath in paths['v3']], v4_paths=[data / filepath for filepath in paths['v4']]) delay = pd.read_csv(data / "bihar_delay.csv").set_index("delay") state_ts = get_time_series(dfn, "detected_state").loc["Bihar"].Hospitalized # state_ts = delay_adjust(state_ts, np.squeeze(delay.values)) state_ts = state_ts[state_ts.index >= "2020-03-26"] district_names, population_counts, _ = etl.district_migration_matrix( data / "Migration Matrix - District.csv") populations = dict(zip(district_names, population_counts)) # first, look at state level predictions (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates) = analytical_MPVS( state_ts, CI=CI, smoothing=notched_smoothing(window=smoothing), totals=False) plt.Rt(dates, Rt_pred[1:], Rt_CI_upper[1:], Rt_CI_lower[1:], CI, ymin=0, ymax=3)\
"v4": ["raw_data3.csv", "raw_data4.csv", "raw_data5.csv", "raw_data6.csv", "raw_data7.csv", "raw_data8.csv", "raw_data9.csv", "raw_data10.csv", "raw_data11.csv"] } # download data from india covid 19 api for target in paths['v3'] + paths['v4']: download_data(data, target) # run rolling regressions on historical national case data dfn = load_all_data( v3_paths = [data/filepath for filepath in paths['v3']], v4_paths = [data/filepath for filepath in paths['v4']] ) data_recency = str(dfn["date_announced"].max()).split()[0] tsn = get_time_series(dfn) grn = estimate(tsn, smoothing) # disaggregate down to states tss = get_time_series(dfn, 'detected_state').loc[states] grs = tss.groupby(level=0).apply(lambda ts: estimate(ts, smoothing)) # voluntary and mandatory reproductive numbers Rvn = np.mean(grn["2020-03-24":"2020-03-31"].R) Rmn = np.mean(grn["2020-04-01":].R) Rvs = {s: np.mean(grs.loc[s].loc["2020-03-24":"2020-03-31"].R) if s in grs.index else Rvn for s in states} Rms = {s: np.mean(grs.loc[s].loc["2020-04-01":].R) if s in grs.index else Rmn for s in states} # voluntary and mandatory distancing rates
"v3": [data_path(i) for i in (1, 2)], "v4": [data_path(i) for i in (3, 4, 5, 6, 7, 8)] } # download data from india covid 19 api for target in paths['v3'] + paths['v4']: download_data(data, target) df = load_all_data( v3_paths = [data/filepath for filepath in paths['v3']], v4_paths = [data/filepath for filepath in paths['v4']] ) data_recency = str(df["date_announced"].max()).split()[0] run_date = str(pd.Timestamp.now()).split()[0] ts = get_time_series(df[df.detected_state == "Delhi"]) ( dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates ) = analytical_MPVS(ts.delta[ts.delta > 0], CI = CI, smoothing = convolution(window = smoothing)) #= analytical_MPVS(ts.Hospitalized[ts.Hospitalized > 0], CI = CI, smoothing = lambda ts: box_filter(ts, smoothing, 10)) np.random.seed(33) delhi = Model([ModelUnit("Delhi", 18_000_000, I0 = T_pred[-1], RR0 = RR_pred[-1], mobility = 0)]) delhi.run(14, np.zeros((1,1))) t_pred = [dates[-1] + pd.Timedelta(days = i) for i in range(len(delhi[0].delta_T))]
sero["hr"] = sero.hom_region.map(hom_regions_numeric) # pull down COVID 19 India data paths = { "v3": [data_path(i) for i in (1, 2)], "v4": [data_path(i) for i in range(3, 19)] } # for target in paths['v3'] + paths['v4']: # download_data(data, target) df = load_all_data(v3_paths = [data/filepath for filepath in paths['v3']], v4_paths = [data/filepath for filepath in paths['v4']])\ .query("detected_state == 'Karnataka'") # get all deaths in KA on Aug 29 by district get_time_series(df, "detected_district")\ .query("status_change_date <= 'Aug 29, 2020'", engine = "python")\ .Deceased.sum(level = 0)\ .drop("Other State")\ .astype(int)\ .to_csv(data/"ka_cumulative_deaths_aug29.csv") # aggregate time series by hom_region df["detected_region"] = df.detected_district.map(hom_regions_rev) ka_ts = get_time_series(df.dropna(subset=["detected_region"]), "detected_region").rename(columns={ "Deceased": "dD", "Hospitalized": "dT", "Recovered": "dR" }).unstack(1).fillna(0).stack() cols = ["dD", "dT", "dR"] ka_ts_all = pd.concat([ka_ts, ka_ts[cols].cumsum().rename(columns = {col: col[1:] for col in cols})], axis = 1)\ .drop(columns = ["date", "time", "delta", "logdelta"])\
} for target in paths['v3'] + paths['v4']: try: download_data(data, target) except: pass df = load_all_data( v3_paths = [data/filepath for filepath in paths['v3']], v4_paths = [data/filepath for filepath in paths['v4']] ) data_recency = str(df["date_announced"].max()).split()[0] run_date = str(pd.Timestamp.now()).split()[0] ts_full = get_time_series(df, "detected_state") ts = ts_full.query("status_change_date <= 'October 14, 2020'") states = ["Bihar", "Maharashtra", "Punjab", "Tamil Nadu"][-1:] codes = ["BR", "MH", "PN", "TN"][-1:] pops = [99.02e6, 114.2e6, 27.98e6, 67.86e6][-1:] Rt_ranges = [(0.9, 2.4), (0.55, 2.06), (0.55, 2.22), (0.84, 1.06)][-1:] windows = [7, 14, 7, 10][-1:] for (state, code, pop, Rt_range, smoothing) in zip(states, codes, pops, Rt_ranges, windows): print(state) print(" + running estimation...") state_ts_full = pd.Series(data = notched_smoothing(window = smoothing)(ts_full.loc[state].Hospitalized), index = ts_full.loc[state].Hospitalized.index) (dates, Rt_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\ = analytical_MPVS(ts.loc[state].Hospitalized, CI = CI, smoothing = lambda x:x, totals = False)