def get_state_timeseries(states=["Tamil Nadu"], download: bool = False) -> pd.DataFrame: paths = { "v3": [data_path(i) for i in (1, 2)], "v4": [data_path(i) for i in range(3, 25)] } if download: for target in paths['v3'] + paths['v4']: download_data(data, target) return load_all_data(v3_paths = [data/filepath for filepath in paths['v3']], v4_paths = [data/filepath for filepath in paths['v4']])\ .query("detected_state in @states" if states != "*" else "detected_state != 'NULL'", engine = "python")\ .pipe(lambda _: get_time_series(_, ["detected_state", "detected_district"]))\ .drop(columns = ["date", "time", "delta", "logdelta"])\ .rename(columns = { "Deceased": "dD", "Hospitalized": "dT", "Recovered": "dR" })
def get_state_timeseries( states = "*", download: bool = False, aggregation_cols = ["detected_state", "detected_district"], last_API_file: int = 27) -> pd.DataFrame: """ load state- and district-level data, downloading source files if specified """ paths = {"v3": [data_path(i) for i in (1, 2)], "v4": [data_path(i) for i in range(3, last_API_file)]} if download: for target in paths['v3'] + paths['v4']: download_data(data, target) return load_all_data(v3_paths = [data/filepath for filepath in paths['v3']], v4_paths = [data/filepath for filepath in paths['v4']])\ .query("detected_state in @states" if states != "*" else "detected_state != 'NULL'")\ .pipe(lambda _: get_time_series(_, aggregation_cols))\ .drop(columns = ["date", "time", "delta", "logdelta"])\ .rename(columns = { "Deceased": "dD", "Hospitalized": "dT", "Recovered": "dR" })
import pandas as pd from epimargin.etl.commons import download_data from epimargin.etl.covid19india import data_path, get_time_series, load_all_data from epimargin.utils import setup data, _ = setup() paths = { "v3": [data_path(i) for i in (1, 2)], "v4": [data_path(i) for i in range(3, 18)] } for target in paths['v3'] + paths['v4']: download_data(data, target) df = load_all_data(v3_paths=[data / filepath for filepath in paths['v3']], v4_paths=[data / filepath for filepath in paths['v4']]) schema = {"Deceased": "dD", "Recovered": "dR", "Hospitalized": "dT"} def assemble_time_series(df): ts = get_time_series(df) deltas = ts[schema.keys()]\ .rename(columns = schema) deltas = deltas.reindex(pd.date_range(deltas.index.min(), deltas.index.max()), fill_value=0) merged = deltas.merge(deltas.cumsum(axis=0).rename(columns=lambda _: _[1]), left_index=True, right_index=True).astype(int)
plt.xlim(df.index.get_level_values(0).min(), df.index.get_level_values(0).max() - pd.Timedelta(days = 7)) plt.ylim(0, 900) plt.legend(loc = "upper left") lax = plt.gca() plt.sca(lax.twinx()) plt.plot(df["TT"][:, "delta", "confirmed"].index, smoothed(df["TT"][:, "delta", "confirmed"].values), label = "Daily Cases", color = plt.PRED_PURPLE) plt.legend(loc = 'upper right') plt.PlotDevice().ylabel("new cases", rotation = -90, labelpad = 50) plt.ylim(bottom = 0) plt.sca(lax) plt.show() # cases vs deaths from pathlib import Path data = Path("./data") paths = {"v3": [data_path(i) for i in (1, 2)], "v4": [data_path(i) for i in range(3, 27)]} for target in paths['v3'] + paths['v4']: download_data(data, target) df = load_all_data(v3_paths = [data/filepath for filepath in paths['v3']], v4_paths = [data/filepath for filepath in paths['v4']])\ .pipe(lambda _: get_time_series(_, ["detected_state"]))\ .drop(columns = ["date", "time", "delta", "logdelta"])\ .rename(columns = { "Deceased": "dD", "Hospitalized": "dT", "Recovered": "dR" }).sum(level = -1).sort_index() plt.plot(df.index, smoothed(df.dD.values), label = "Daily Deaths", color = plt.RED) plt.text(s = "national lockdown", x = pd.to_datetime("April 27, 2020"), y = 200, fontdict = plt.theme.note, ha = "center", va = "top") plt.legend(loc = 'upper left') plt.ylim(bottom = 0)
adr = pd.date_range(end=onset.index[-1], periods=len(adj)) adjusted = pd.Series(adj, index=adr) return adjusted root = cwd() data = root / "data" figs = root / "figs" gamma = 0.2 smoothing = 10 CI = 0.95 paths = { "v3": [data_path(_) for _ in (1, 2)], "v4": [data_path(_) for _ in range(3, 18)] } for target in paths['v3'] + paths['v4']: download_data(data, target) dfn = load_all_data(v3_paths=[data / filepath for filepath in paths['v3']], v4_paths=[data / filepath for filepath in paths['v4']]) delay = pd.read_csv(data / "bihar_delay.csv").set_index("delay") state_ts = get_time_series(dfn, "detected_state").loc["Bihar"].Hospitalized # state_ts = delay_adjust(state_ts, np.squeeze(delay.values)) state_ts = state_ts[state_ts.index >= "2020-03-26"] district_names, population_counts, _ = etl.district_migration_matrix(
# model details CI = 0.99 smoothing = 10 if __name__ == "__main__": root = cwd() data = root / "data" output = root / "output" if not data.exists(): data.mkdir() if not output.exists(): output.mkdir() # define data versions for api files paths = { "v3": [data_path(i) for i in (1, 2)], "v4": [data_path(i) for i in (3, 4, 5, 6, 7, 8)] } # download data from india covid 19 api for target in paths['v3'] + paths['v4']: download_data(data, target) df = load_all_data(v3_paths=[data / filepath for filepath in paths['v3']], v4_paths=[data / filepath for filepath in paths['v4']]) data_recency = str(df["date_announced"].max()).split()[0] run_date = str(pd.Timestamp.now()).split()[0] ts = get_time_series(df[df.detected_state == "Delhi"]) (dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower,