import adaptive.plots as plt import flat_table import numpy as np import pandas as pd from adaptive.estimators import analytical_MPVS from adaptive.etl.covid19india import state_code_lookup from adaptive.smoothing import notched_smoothing from adaptive.utils import days, setup from statsmodels.api import OLS from statsmodels.iolib.summary2 import summary_col from scipy.signal import convolve, deconvolve from scipy.stats import expon as Exponential, gamma as Gamma data, _ = setup() # load population data india_pop = pd.read_csv(data / "india_pop.csv", names=["state", "population"], index_col="state").to_dict()["population"] india_pop["India"] = sum(india_pop.values()) india_pop["Odisha"] = india_pop["Orissa"] india_pop["Puducherry"] = india_pop["Pondicherry"] india_pop["Uttarakhand"] = india_pop["Uttaranchal"] # load case data with (data / 'timeseries.json').open("rb") as fp: df = flat_table.normalize(pd.read_json(fp)).fillna(0) df.columns = df.columns.str.split('.', expand=True) dates = np.squeeze(df["index"][None].values) df = df.drop(columns="index").set_index(dates).stack([1, 2]).drop("UN", axis=1)
import numpy as np import pandas as pd from adaptive.estimators import analytical_MPVS, linear_projection from adaptive.models import SIR, NetworkedSIR from adaptive.smoothing import convolution, notched_smoothing from adaptive.utils import cwd, days, setup from matplotlib import rcParams from statsmodels.regression.linear_model import OLS from statsmodels.tools import add_constant from tqdm import tqdm import etl simplefilter("ignore") (data, figs) = setup() gamma = 0.2 smoothing = 10 CI = 0.95 state_cases = pd.read_csv(data / "Bihar_cases_data_Oct03.csv", parse_dates=["date_reported"], dayfirst=True) state_ts = state_cases["date_reported"].value_counts().sort_index() district_names, population_counts, _ = etl.district_migration_matrix( data / "Migration Matrix - District.csv") populations = dict(zip(district_names, population_counts)) # first, look at state level predictions (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower,
date_scale = 1000000.0 date = "tanggal" timeseries = "list_perkembangan" total_cases = "AKUMULASI_KASUS" filename = lambda province: "prov_detail_{}.json".format(province.replace(" ", "_")) def load_province_timeseries(data_path: Path, province: str) -> pd.DataFrame: with (data_path/filename(province)).open() as fp: top_level = json.load(fp) df = pd.DataFrame([(_[date], _[total_cases]) for _ in top_level[timeseries]], columns=["date", "total_cases"]) df["date"] = (date_scale * df["date"]).apply(pd.Timestamp) return df.set_index("date") (data, figs) = setup(level = "INFO") # for province in provinces: # logger.info("downloading data for %s", province) # download_data(data, filename(province), base_url = "https://data.covid19.go.id/public/api/") province_cases = {province: load_province_timeseries(data, province) for province in provinces} bgn = min(cases.index.min() for cases in province_cases.values()) end = max(cases.index.max() for cases in province_cases.values()) idx = pd.date_range(bgn, end) province_cases = {province: cases.reindex(idx, method = "pad").fillna(0) for (province, cases) in province_cases.items()} natl_cases = sum(province_cases.values()) logger.info("running national-level Rt estimate") (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\ = analytical_MPVS(natl_cases, CI = CI, smoothing = smoothing)
estimates = analytical_MPVS(time_series, CI=CI, smoothing=smooth, totals=True) return pd.DataFrame( data={ "date": estimates[0], "Rt": estimates[1], "Rt_upper": estimates[2], "Rt_lower": estimates[3], "total_cases": estimates[-4][2:], "new_cases": estimates[-3], }) data, figs = setup() download_data(data, 'timeseries.json', "https://api.covid19india.org/v3/") # data prep with (data / 'timeseries.json').open("rb") as fp: df = flat_table.normalize(pd.read_json(fp)).fillna(0) df.columns = df.columns.str.split('.', expand=True) dates = np.squeeze(df["index"][None].values) df = df.drop(columns="index").set_index(dates).stack([1, 2]).drop("UN", axis=1) # drop last 2 days to avoid count drops df = df[(start_date <= df.index.get_level_values(0)) & (df.index.get_level_values(0) <= pd.Timestamp.now().normalize() - pd.Timedelta(days=2))]