import pandas as pd from epimargin.etl.commons import download_data from epimargin.etl.covid19india import data_path, get_time_series, load_all_data from epimargin.utils import setup data, _ = setup() paths = { "v3": [data_path(i) for i in (1, 2)], "v4": [data_path(i) for i in range(3, 18)] } for target in paths['v3'] + paths['v4']: download_data(data, target) df = load_all_data(v3_paths=[data / filepath for filepath in paths['v3']], v4_paths=[data / filepath for filepath in paths['v4']]) schema = {"Deceased": "dD", "Recovered": "dR", "Hospitalized": "dT"} def assemble_time_series(df): ts = get_time_series(df) deltas = ts[schema.keys()]\ .rename(columns = schema) deltas = deltas.reindex(pd.date_range(deltas.index.min(), deltas.index.max()), fill_value=0) merged = deltas.merge(deltas.cumsum(axis=0).rename(columns=lambda _: _[1]), left_index=True, right_index=True).astype(int)
CI = 0.95 smoothing = notched_smoothing(window = window) dkij_drop_cols = [ 'age', 'sex', 'fever', 'temp', 'cough', 'flu', 'sore_throat', 'shortness_breath', 'shivering', 'headache', 'malaise', 'muscle_pain', 'nausea_vomiting', 'abdominal_pain', 'diarrhoea', 'date_recovered', 'date_died', 'heart_disease', 'diabetes', 'pneumonia', 'hypertension', 'malignant', 'immunology_disorder', 'chronic_kidney', 'chronic_liver', 'copd', 'obesity', 'pregnant', 'tracing', 'otg', 'icu', 'intubation', 'ecmo', 'criteria_cases', 'age_group', 'age_group2', 'date_discharge', 'patient_status', 'death' ] shp_drop_cols = ['GID_0', 'NAME_0', 'GID_1', 'NAME_1', 'NL_NAME_1', 'GID_2', 'VARNAME_2', 'NL_NAME_2', 'TYPE_2', 'ENGTYPE_2', 'CC_2', 'HASC_2'] (data, figs) = setup(level = "INFO") dkij = pd.read_stata(data/"dkijakarta_180820.dta")\ .query("province == 'DKI JAKARTA'")\ .drop(columns=dkij_drop_cols + ["province"]) dkij["district"] = dkij.district.str.title() gdf = gpd.read_file("data/gadm36_IDN_shp/gadm36_IDN_2.shp")\ .query("NAME_1 == 'Jakarta Raya'")\ .drop(columns=shp_drop_cols) bbox = shapely.geometry.box(minx = 106.65, maxx = 107.00, miny = -6.40, maxy=-6.05) gdf = gdf[gdf.intersects(bbox)] jakarta_districts = dkij.district.str.title().unique() jakarta_cases = dkij.groupby("date_positiveresult")["id"].count().rename("cases") logger.info("running province-level Rt estimate")
import numpy as np import pandas as pd from epimargin.estimators import analytical_MPVS, linear_projection from epimargin.models import SIR, NetworkedSIR from epimargin.smoothing import convolution, notched_smoothing from epimargin.utils import cwd, days, setup from matplotlib import rcParams from statsmodels.regression.linear_model import OLS from statsmodels.tools import add_constant from tqdm import tqdm import etl simplefilter("ignore") (data, figs) = setup() gamma = 0.2 smoothing = 10 CI = 0.95 state_cases = pd.read_csv(data / "Bihar_cases_data_Oct03.csv", parse_dates=["date_reported"], dayfirst=True) state_ts = state_cases["date_reported"].value_counts().sort_index() district_names, population_counts, _ = etl.district_migration_matrix( data / "Migration Matrix - District.csv") populations = dict(zip(district_names, population_counts)) # first, look at state level predictions (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower,
estimates = analytical_MPVS(time_series, CI=CI, smoothing=smooth, totals=True) return pd.DataFrame( data={ "date": estimates[0], "Rt": estimates[1], "Rt_upper": estimates[2], "Rt_lower": estimates[3], "total_cases": estimates[-4][2:], "new_cases": estimates[-3], }) data, figs = setup() download_data(data, 'timeseries.json', "https://api.covid19india.org/v3/") download_data(data, 'state_wise.csv', "https://api.covid19india.org/v3/") download_data(data, 'states.csv', "https://api.covid19india.org/v3/") download_data(data, 'districts.csv', "https://api.covid19india.org/v3/") # data prep with (data / 'timeseries.json').open("rb") as fp: df = flat_table.normalize(pd.read_json(fp)).fillna(0) df.columns = df.columns.str.split('.', expand=True) dates = np.squeeze(df["index"][None].values) df = df.drop(columns = "index")\ .set_index(dates)\ .stack([1, 2])\ .drop("UN", axis = 1)\