Esempio n. 1
0
def assemble_time_series(df):
    ts = get_time_series(df)
    deltas = ts[schema.keys()]\
        .rename(columns = schema)
    deltas = deltas.reindex(pd.date_range(deltas.index.min(),
                                          deltas.index.max()),
                            fill_value=0)
    merged = deltas.merge(deltas.cumsum(axis=0).rename(columns=lambda _: _[1]),
                          left_index=True,
                          right_index=True).astype(int)
    merged.index.name = "date"
    merged.columns.name = None
    return merged
}

for target in paths['v3'] + paths['v4']:
    try: 
        download_data(data, target)
    except:
        pass 

df = load_all_data(
    v3_paths = [data/filepath for filepath in paths['v3']], 
    v4_paths = [data/filepath for filepath in paths['v4']]
)
data_recency = str(df["date_announced"].max()).split()[0]
run_date     = str(pd.Timestamp.now()).split()[0]

ts = get_time_series(df, "detected_state")

states = ["Maharashtra", "Bihar", "Delhi", "Andhra Pradesh", "Telangana", "Tamil Nadu", "Madhya Pradesh"]

for state in states: 
    print(state)
    print("  + running estimation...")
    (
        dates,
        Rt_pred, RR_CI_upper, RR_CI_lower,
        T_pred, T_CI_upper, T_CI_lower,
        total_cases, new_cases_ts,
        anomalies, anomaly_dates
    ) = analytical_MPVS(ts.loc[state].Hospitalized, CI = CI, smoothing = notched_smoothing(window = smoothing), totals = False)
    estimates = pd.DataFrame(data = {
        "dates": dates,
Esempio n. 3
0
    .ylabel("Rt", rotation=0, labelpad=20)
plt.ylim(0, 4)

# public data
paths = {
    "v3": [data_path(_) for _ in (1, 2)],
    "v4": [data_path(_) for _ in range(3, 13)]
}

for target in paths['v3'] + paths['v4']:
    download_data(data, target)

dfn = load_all_data(v3_paths=[data / filepath for filepath in paths['v3']],
                    v4_paths=[data / filepath for filepath in paths['v4']])

state_ts = get_time_series(dfn, "detected_state").loc["Bihar"]
district_names, population_counts, _ = etl.district_migration_matrix(
    data / "Migration Matrix - District.csv")
populations = dict(zip(district_names, population_counts))

# first, look at state level predictions
(dates_public, RR_pred_public, RR_CI_upper_public, RR_CI_lower_public,
 T_pred_public, T_CI_upper_public, T_CI_lower_public, total_cases_public,
 new_cases_ts_public, anomalies_public,
 anomaly_dates_public) = analytical_MPVS(
     state_ts.Hospitalized, CI=CI, smoothing=convolution(window=smoothing))
plt.plot(dates_public,
         RR_pred_public,
         label="Estimated $R_t$",
         color="midnightblue")
plt.fill_between(dates_public,
    b2, a2 = iirnotch(2*f0, 2*Q, fs)
    b = convolve(b1, b2)
    a = convolve(a1, a2)
    notched = pd.Series(filtfilt(b, a, ts))
    notched.index = ts.index
    return notched

root = cwd()
data = mkdir(root/"data")
figs = mkdir(root/"figs")

###########################################################
# download latest case data
download_data(data, 'state_wise_daily.csv')
df = load_statewise_data(data/"state_wise_daily.csv")
ts = get_time_series(df, "state")

###########################################################
# load delay data
api_diff = pd.read_csv(data/"daily_diff.csv", parse_dates=["status_change_date", "report_date"],  dayfirst=True)
delay = api_diff[(api_diff.current_status == "Hospitalized") & (api_diff.report_date > "2020-08-02")].copy()
delay = delay.drop(columns = [col for col in delay.columns if col.startswith("Unnamed")] + ["rowhash"])
delay["newhash"] = delay[["patient_number", "date_announced", "detected_district", "detected_state","current_status", "status_change_date", "num_cases"]].apply(lambda x: hash(tuple(x)), axis = 1)
delay = delay.drop_duplicates(subset=["newhash"], keep="first")
delay["delay"] = (delay.report_date - delay.status_change_date).dt.days
state_hist = delay[["detected_state", "num_cases", "delay"]].set_index(["detected_state", "delay"])["num_cases"].sum(level = [0, 1]).sort_index()
state_dist = state_hist/state_hist.sum(level = 0)

delay_hist = delay.groupby("delay")["num_cases"].sum()
delay_dist = delay_hist/delay_hist.sum()
Esempio n. 5
0

# set to cloud temp directory if not explicitly told to run locally
root = cwd() if len(sys.argv) > 1 and sys.argv[1] == "--local" else Path(
    "/tmp")
data = root / "data"

# model details
gamma = 0.2
smoothing = 10
CI = 0.95

download_data(data, 'state_wise_daily.csv')

state_df = load_statewise_data(data / "state_wise_daily.csv")
country_time_series = get_time_series(state_df)

estimates = []
timeseries = []

# country level
(dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower,
 total_cases, new_cases_ts, anomalies, anomaly_dates) = analytical_MPVS(
     country_time_series["Hospitalized"].iloc[:-1],
     CI=CI,
     smoothing=notched_smoothing(window=smoothing))

country_code = state_name_lookup["India"]
for row in zip(dates, RR_pred, RR_CI_upper, RR_CI_lower):
    timeseries.append((country_code, *row))
Esempio n. 6
0
    ywf = fft(y * w)
    xf = np.linspace(0.0, 1.0 / (2.0 * T), N // 2)
    plt.plot(xf[1:N // 2],
             2 / N * np.abs(ywf[1:N // 2]),
             ".",
             alpha=0.7,
             label=label)


root = cwd()
data = root / "data"
figs = root / "figs"

download_data(data, 'state_wise_daily.csv')
state_df = load_statewise_data(data / "state_wise_daily.csv")
natl_time_series = get_time_series(state_df)
time_series = get_time_series(state_df, 'state')

# is there chunking in reporting?
print("checking average infection differentials...")
time_series["delta_I"] = time_series.groupby(level=0)['Hospitalized'].diff()
time_series["dow"] = time_series.index.get_level_values(1).dayofweek
plot_average_change(time_series,
                    "(All India)",
                    filename=figs / "avg_delta_I_DoW_India.png")
for state in tqdm(time_series.index.get_level_values(0).unique()):
    plot_average_change(time_series.loc[state],
                        f"({state})",
                        filename=figs / f"avg_delta_I_DoW_{state}.png")

# are anomalies falling on certain days?
Esempio n. 7
0
CI = 0.95

paths = {
    "v3": [data_path(_) for _ in (1, 2)],
    "v4": [data_path(_) for _ in range(3, 18)]
}

for target in paths['v3'] + paths['v4']:
    download_data(data, target)

dfn = load_all_data(v3_paths=[data / filepath for filepath in paths['v3']],
                    v4_paths=[data / filepath for filepath in paths['v4']])

delay = pd.read_csv(data / "bihar_delay.csv").set_index("delay")

state_ts = get_time_series(dfn, "detected_state").loc["Bihar"].Hospitalized
# state_ts = delay_adjust(state_ts, np.squeeze(delay.values))
state_ts = state_ts[state_ts.index >= "2020-03-26"]
district_names, population_counts, _ = etl.district_migration_matrix(
    data / "Migration Matrix - District.csv")
populations = dict(zip(district_names, population_counts))

# first, look at state level predictions
(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower,
 total_cases, new_cases_ts, anomalies, anomaly_dates) = analytical_MPVS(
     state_ts,
     CI=CI,
     smoothing=notched_smoothing(window=smoothing),
     totals=False)

plt.Rt(dates, Rt_pred[1:], Rt_CI_upper[1:], Rt_CI_lower[1:], CI, ymin=0, ymax=3)\
              "v4": ["raw_data3.csv", "raw_data4.csv",
                     "raw_data5.csv", "raw_data6.csv",
                     "raw_data7.csv", "raw_data8.csv",
                     "raw_data9.csv", "raw_data10.csv", "raw_data11.csv"] } 

    # download data from india covid 19 api
    for target in paths['v3'] + paths['v4']:
        download_data(data, target)

    # run rolling regressions on historical national case data 
    dfn = load_all_data(
        v3_paths = [data/filepath for filepath in paths['v3']], 
        v4_paths = [data/filepath for filepath in paths['v4']]
    )
    data_recency = str(dfn["date_announced"].max()).split()[0]
    tsn = get_time_series(dfn)
    grn = estimate(tsn, smoothing)

    # disaggregate down to states
    tss = get_time_series(dfn, 'detected_state').loc[states]

    grs = tss.groupby(level=0).apply(lambda ts: estimate(ts, smoothing))
    
    # voluntary and mandatory reproductive numbers
    Rvn = np.mean(grn["2020-03-24":"2020-03-31"].R)
    Rmn = np.mean(grn["2020-04-01":].R)

    Rvs = {s: np.mean(grs.loc[s].loc["2020-03-24":"2020-03-31"].R) if s in grs.index else Rvn for s in states}
    Rms = {s: np.mean(grs.loc[s].loc["2020-04-01":].R)             if s in grs.index else Rmn for s in states}

    # voluntary and mandatory distancing rates 
Esempio n. 9
0
        "v3": [data_path(i) for i in (1, 2)],
        "v4": [data_path(i) for i in (3, 4, 5, 6, 7, 8)]
    }

    # download data from india covid 19 api
    for target in paths['v3'] + paths['v4']:
        download_data(data, target)

    df = load_all_data(
        v3_paths = [data/filepath for filepath in paths['v3']], 
        v4_paths = [data/filepath for filepath in paths['v4']]
    )
    data_recency = str(df["date_announced"].max()).split()[0]
    run_date     = str(pd.Timestamp.now()).split()[0]

    ts = get_time_series(df[df.detected_state == "Delhi"])

    (
        dates,
        RR_pred, RR_CI_upper, RR_CI_lower,
        T_pred, T_CI_upper, T_CI_lower,
        total_cases, new_cases_ts,
        anomalies, anomaly_dates
    ) = analytical_MPVS(ts.delta[ts.delta > 0], CI = CI, smoothing = convolution(window = smoothing)) 
    #= analytical_MPVS(ts.Hospitalized[ts.Hospitalized > 0], CI = CI, smoothing = lambda ts: box_filter(ts, smoothing, 10))

    np.random.seed(33)
    delhi = Model([ModelUnit("Delhi", 18_000_000, I0 = T_pred[-1], RR0 = RR_pred[-1], mobility = 0)])
    delhi.run(14, np.zeros((1,1)))

    t_pred = [dates[-1] + pd.Timedelta(days = i) for i in range(len(delhi[0].delta_T))]
Esempio n. 10
0
sero["hr"] = sero.hom_region.map(hom_regions_numeric)

# pull down COVID 19 India data
paths = {
    "v3": [data_path(i) for i in (1, 2)],
    "v4": [data_path(i) for i in range(3, 19)]
}
# for target in paths['v3'] + paths['v4']:
#     download_data(data, target)
df = load_all_data(v3_paths = [data/filepath for filepath in paths['v3']],  v4_paths = [data/filepath for filepath in paths['v4']])\
    .query("detected_state == 'Karnataka'")

# get all deaths in KA on Aug 29 by district
get_time_series(df, "detected_district")\
    .query("status_change_date <= 'Aug 29, 2020'", engine = "python")\
    .Deceased.sum(level = 0)\
    .drop("Other State")\
    .astype(int)\
    .to_csv(data/"ka_cumulative_deaths_aug29.csv")

# aggregate time series by hom_region
df["detected_region"] = df.detected_district.map(hom_regions_rev)
ka_ts = get_time_series(df.dropna(subset=["detected_region"]),
                        "detected_region").rename(columns={
                            "Deceased": "dD",
                            "Hospitalized": "dT",
                            "Recovered": "dR"
                        }).unstack(1).fillna(0).stack()

cols = ["dD", "dT", "dR"]
ka_ts_all = pd.concat([ka_ts, ka_ts[cols].cumsum().rename(columns = {col: col[1:] for col in cols})], axis = 1)\
    .drop(columns = ["date", "time", "delta", "logdelta"])\
Esempio n. 11
0
}

for target in paths['v3'] + paths['v4']:
    try: 
        download_data(data, target)
    except:
        pass 

df = load_all_data(
    v3_paths = [data/filepath for filepath in paths['v3']], 
    v4_paths = [data/filepath for filepath in paths['v4']]
)
data_recency = str(df["date_announced"].max()).split()[0]
run_date     = str(pd.Timestamp.now()).split()[0]

ts_full = get_time_series(df, "detected_state")
ts = ts_full.query("status_change_date <= 'October 14, 2020'")

states    = ["Bihar", "Maharashtra", "Punjab", "Tamil Nadu"][-1:]
codes     = ["BR",    "MH",          "PN",     "TN"][-1:]
pops      = [99.02e6, 114.2e6,       27.98e6,  67.86e6][-1:]
Rt_ranges = [(0.9, 2.4), (0.55, 2.06), (0.55, 2.22), (0.84, 1.06)][-1:]
windows   = [7, 14, 7, 10][-1:]


for (state, code, pop, Rt_range, smoothing) in zip(states, codes, pops, Rt_ranges, windows): 
    print(state)
    print("  + running estimation...")
    state_ts_full = pd.Series(data = notched_smoothing(window = smoothing)(ts_full.loc[state].Hospitalized), index = ts_full.loc[state].Hospitalized.index)
    (dates, Rt_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
        = analytical_MPVS(ts.loc[state].Hospitalized, CI = CI, smoothing = lambda x:x, totals = False)