import pandas as pd from adaptive.etl.commons import download_data from adaptive.etl.covid19india import data_path, get_time_series, load_all_data from adaptive.utils import setup data, _ = setup() paths = { "v3": [data_path(i) for i in (1, 2)], "v4": [data_path(i) for i in range(3, 18)] } for target in paths['v3'] + paths['v4']: download_data(data, target) df = load_all_data(v3_paths=[data / filepath for filepath in paths['v3']], v4_paths=[data / filepath for filepath in paths['v4']]) schema = {"Deceased": "dD", "Recovered": "dR", "Hospitalized": "dT"} def assemble_time_series(df): ts = get_time_series(df) deltas = ts[schema.keys()]\ .rename(columns = schema) deltas = deltas.reindex(pd.date_range(deltas.index.min(), deltas.index.max()), fill_value=0) merged = deltas.merge(deltas.cumsum(axis=0).rename(columns=lambda _: _[1]), left_index=True, right_index=True).astype(int)
# from Karnataka COVID_age_ratios = np.array([ 0.01618736, 0.07107746, 0.23314877, 0.22946212, 0.18180406, 0.1882451, 0.05852026, 0.02155489 ]) india_pop = pd.read_csv(data / "india_pop.csv", names=["state", "population"], index_col="state").to_dict()["population"] india_pop["Odisha"] = india_pop["Orissa"] india_pop["Puducherry"] = india_pop["Pondicherry"] india_pop["Uttarakhand"] = india_pop["Uttaranchal"] # load covid19 india data download_data(data, 'timeseries.json', "https://api.covid19india.org/v3/") with (data / 'timeseries.json').open("rb") as fp: df = flat_table.normalize(pd.read_json(fp)).fillna(0) df.columns = df.columns.str.split('.', expand=True) dates = np.squeeze(df["index"][None].values) df = df.drop(columns="index").set_index(dates).stack([1, 2]).drop("UN", axis=1) # load Rt data # Rt = pd.read_csv("data/Rt_timeseries_india.csv") # date = "2020-12-24" # for state in set(df.loc[date, :, :].columns) - {"TT", "LA", "SK", "NL"}: # for state in ["TN"]: # N = india_pop[state_code_lookup[state].replace("&", "and")] # T = df[state].loc[date, "total", "confirmed"] # R = df[state].loc[date, "total", "recovered"] # D = df[state].loc[date, "total", "deceased"]
fs, f0, Q = 1, 1/7, 1 b1, a1 = iirnotch(f0, Q, fs) b2, a2 = iirnotch(2*f0, 2*Q, fs) b = convolve(b1, b2) a = convolve(a1, a2) notched = pd.Series(filtfilt(b, a, ts)) notched.index = ts.index return notched root = cwd() data = mkdir(root/"data") figs = mkdir(root/"figs") ########################################################### # download latest case data download_data(data, 'state_wise_daily.csv') df = load_statewise_data(data/"state_wise_daily.csv") ts = get_time_series(df, "state") ########################################################### # load delay data api_diff = pd.read_csv(data/"daily_diff.csv", parse_dates=["status_change_date", "report_date"], dayfirst=True) delay = api_diff[(api_diff.current_status == "Hospitalized") & (api_diff.report_date > "2020-08-02")].copy() delay = delay.drop(columns = [col for col in delay.columns if col.startswith("Unnamed")] + ["rowhash"]) delay["newhash"] = delay[["patient_number", "date_announced", "detected_district", "detected_state","current_status", "status_change_date", "num_cases"]].apply(lambda x: hash(tuple(x)), axis = 1) delay = delay.drop_duplicates(subset=["newhash"], keep="first") delay["delay"] = (delay.report_date - delay.status_change_date).dt.days state_hist = delay[["detected_state", "num_cases", "delay"]].set_index(["detected_state", "delay"])["num_cases"].sum(level = [0, 1]).sort_index() state_dist = state_hist/state_hist.sum(level = 0) delay_hist = delay.groupby("delay")["num_cases"].sum()
top_level = json.load(fp) df = pd.DataFrame([(_[date], _[total_cases]) for _ in top_level[timeseries]], columns=["date", "total_cases"]) df["date"] = (date_scale * df["date"]).apply(pd.Timestamp) df.set_index("date", inplace=True) if start_date: return df[df.index >= start_date] return df (data, figs) = setup(level="INFO") for province in provinces: logger.info("downloading data for %s", province) download_data(data, filename(province), base_url="https://data.covid19.go.id/public/api/") province_cases = { province: load_province_timeseries(data, province, "Apr 1, 2020") for province in provinces } bgn = min(cases.index.min() for cases in province_cases.values()) end = max(cases.index.max() for cases in province_cases.values()) idx = pd.date_range(bgn, end) province_cases = { province: cases.reindex(idx, method="pad").fillna(0) for (province, cases) in province_cases.items() } prediction_period = 14 * days