Beispiel #1
0
import pandas as pd
from epimargin.etl.commons import download_data
from epimargin.etl.covid19india import data_path, get_time_series, load_all_data
from epimargin.utils import setup

data, _ = setup()

paths = {
    "v3": [data_path(i) for i in (1, 2)],
    "v4": [data_path(i) for i in range(3, 18)]
}

for target in paths['v3'] + paths['v4']:
    download_data(data, target)

df = load_all_data(v3_paths=[data / filepath for filepath in paths['v3']],
                   v4_paths=[data / filepath for filepath in paths['v4']])

schema = {"Deceased": "dD", "Recovered": "dR", "Hospitalized": "dT"}


def assemble_time_series(df):
    ts = get_time_series(df)
    deltas = ts[schema.keys()]\
        .rename(columns = schema)
    deltas = deltas.reindex(pd.date_range(deltas.index.min(),
                                          deltas.index.max()),
                            fill_value=0)
    merged = deltas.merge(deltas.cumsum(axis=0).rename(columns=lambda _: _[1]),
                          left_index=True,
                          right_index=True).astype(int)
Beispiel #2
0
CI        = 0.95
smoothing = notched_smoothing(window = window)

dkij_drop_cols = [
    'age', 'sex', 'fever', 'temp', 'cough', 'flu', 'sore_throat', 'shortness_breath', 'shivering', 'headache', 'malaise', 'muscle_pain',
    'nausea_vomiting', 'abdominal_pain', 'diarrhoea', 'date_recovered',
    'date_died', 'heart_disease', 'diabetes', 'pneumonia', 'hypertension', 'malignant',
    'immunology_disorder', 'chronic_kidney', 'chronic_liver', 'copd',
    'obesity', 'pregnant', 'tracing', 'otg', 'icu', 'intubation', 'ecmo',
    'criteria_cases', 'age_group', 'age_group2', 'date_discharge',
    'patient_status', 'death'
]

shp_drop_cols = ['GID_0', 'NAME_0', 'GID_1', 'NAME_1', 'NL_NAME_1', 'GID_2', 'VARNAME_2', 'NL_NAME_2', 'TYPE_2', 'ENGTYPE_2', 'CC_2', 'HASC_2']

(data, figs) = setup(level = "INFO")
dkij = pd.read_stata(data/"dkijakarta_180820.dta")\
         .query("province == 'DKI JAKARTA'")\
         .drop(columns=dkij_drop_cols + ["province"])
dkij["district"] = dkij.district.str.title()

gdf = gpd.read_file("data/gadm36_IDN_shp/gadm36_IDN_2.shp")\
         .query("NAME_1 == 'Jakarta Raya'")\
         .drop(columns=shp_drop_cols)
bbox = shapely.geometry.box(minx = 106.65, maxx = 107.00, miny = -6.40, maxy=-6.05)
gdf = gdf[gdf.intersects(bbox)]

jakarta_districts = dkij.district.str.title().unique()
jakarta_cases = dkij.groupby("date_positiveresult")["id"].count().rename("cases")

logger.info("running province-level Rt estimate")
Beispiel #3
0
import numpy as np
import pandas as pd
from epimargin.estimators import analytical_MPVS, linear_projection
from epimargin.models import SIR, NetworkedSIR
from epimargin.smoothing import convolution, notched_smoothing
from epimargin.utils import cwd, days, setup
from matplotlib import rcParams
from statsmodels.regression.linear_model import OLS
from statsmodels.tools import add_constant
from tqdm import tqdm

import etl

simplefilter("ignore")

(data, figs) = setup()

gamma = 0.2
smoothing = 10
CI = 0.95

state_cases = pd.read_csv(data / "Bihar_cases_data_Oct03.csv",
                          parse_dates=["date_reported"],
                          dayfirst=True)
state_ts = state_cases["date_reported"].value_counts().sort_index()
district_names, population_counts, _ = etl.district_migration_matrix(
    data / "Migration Matrix - District.csv")
populations = dict(zip(district_names, population_counts))

# first, look at state level predictions
(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower,
Beispiel #4
0
    estimates = analytical_MPVS(time_series,
                                CI=CI,
                                smoothing=smooth,
                                totals=True)
    return pd.DataFrame(
        data={
            "date": estimates[0],
            "Rt": estimates[1],
            "Rt_upper": estimates[2],
            "Rt_lower": estimates[3],
            "total_cases": estimates[-4][2:],
            "new_cases": estimates[-3],
        })


data, figs = setup()

download_data(data, 'timeseries.json', "https://api.covid19india.org/v3/")
download_data(data, 'state_wise.csv', "https://api.covid19india.org/v3/")
download_data(data, 'states.csv', "https://api.covid19india.org/v3/")
download_data(data, 'districts.csv', "https://api.covid19india.org/v3/")

# data prep
with (data / 'timeseries.json').open("rb") as fp:
    df = flat_table.normalize(pd.read_json(fp)).fillna(0)
df.columns = df.columns.str.split('.', expand=True)
dates = np.squeeze(df["index"][None].values)
df = df.drop(columns = "index")\
    .set_index(dates)\
    .stack([1, 2])\
    .drop("UN", axis = 1)\