import adaptive.plots as plt
import flat_table
import numpy as np
import pandas as pd
from adaptive.estimators import analytical_MPVS
from adaptive.etl.covid19india import state_code_lookup
from adaptive.smoothing import notched_smoothing
from adaptive.utils import days, setup
from statsmodels.api import OLS
from statsmodels.iolib.summary2 import summary_col
from scipy.signal import convolve, deconvolve

from scipy.stats import expon as Exponential, gamma as Gamma

data, _ = setup()

# load population data
india_pop = pd.read_csv(data / "india_pop.csv",
                        names=["state", "population"],
                        index_col="state").to_dict()["population"]
india_pop["India"] = sum(india_pop.values())
india_pop["Odisha"] = india_pop["Orissa"]
india_pop["Puducherry"] = india_pop["Pondicherry"]
india_pop["Uttarakhand"] = india_pop["Uttaranchal"]

# load case data
with (data / 'timeseries.json').open("rb") as fp:
    df = flat_table.normalize(pd.read_json(fp)).fillna(0)
df.columns = df.columns.str.split('.', expand=True)
dates = np.squeeze(df["index"][None].values)
df = df.drop(columns="index").set_index(dates).stack([1, 2]).drop("UN", axis=1)
Beispiel #2
0
import numpy as np
import pandas as pd
from adaptive.estimators import analytical_MPVS, linear_projection
from adaptive.models import SIR, NetworkedSIR
from adaptive.smoothing import convolution, notched_smoothing
from adaptive.utils import cwd, days, setup
from matplotlib import rcParams
from statsmodels.regression.linear_model import OLS
from statsmodels.tools import add_constant
from tqdm import tqdm

import etl

simplefilter("ignore")

(data, figs) = setup()

gamma = 0.2
smoothing = 10
CI = 0.95

state_cases = pd.read_csv(data / "Bihar_cases_data_Oct03.csv",
                          parse_dates=["date_reported"],
                          dayfirst=True)
state_ts = state_cases["date_reported"].value_counts().sort_index()
district_names, population_counts, _ = etl.district_migration_matrix(
    data / "Migration Matrix - District.csv")
populations = dict(zip(district_names, population_counts))

# first, look at state level predictions
(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower,
Beispiel #3
0
date_scale  = 1000000.0
date        = "tanggal"
timeseries  = "list_perkembangan"
total_cases = "AKUMULASI_KASUS"

filename = lambda province: "prov_detail_{}.json".format(province.replace(" ", "_"))

def load_province_timeseries(data_path: Path, province: str) -> pd.DataFrame:
    with (data_path/filename(province)).open() as fp:
        top_level = json.load(fp)
    df = pd.DataFrame([(_[date], _[total_cases]) for _ in top_level[timeseries]], columns=["date", "total_cases"])
    df["date"] = (date_scale * df["date"]).apply(pd.Timestamp)
    return df.set_index("date")


(data, figs) = setup(level = "INFO")
# for province in provinces:
#     logger.info("downloading data for %s", province)
#     download_data(data, filename(province), base_url = "https://data.covid19.go.id/public/api/")

province_cases = {province: load_province_timeseries(data, province) for province in provinces}
bgn = min(cases.index.min() for cases in province_cases.values())
end = max(cases.index.max() for cases in province_cases.values())
idx = pd.date_range(bgn, end)
province_cases = {province: cases.reindex(idx, method = "pad").fillna(0) for (province, cases) in province_cases.items()}
natl_cases = sum(province_cases.values())


logger.info("running national-level Rt estimate")
(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
     = analytical_MPVS(natl_cases, CI = CI, smoothing = smoothing) 
    estimates = analytical_MPVS(time_series,
                                CI=CI,
                                smoothing=smooth,
                                totals=True)
    return pd.DataFrame(
        data={
            "date": estimates[0],
            "Rt": estimates[1],
            "Rt_upper": estimates[2],
            "Rt_lower": estimates[3],
            "total_cases": estimates[-4][2:],
            "new_cases": estimates[-3],
        })


data, figs = setup()

download_data(data, 'timeseries.json', "https://api.covid19india.org/v3/")

# data prep
with (data / 'timeseries.json').open("rb") as fp:
    df = flat_table.normalize(pd.read_json(fp)).fillna(0)
df.columns = df.columns.str.split('.', expand=True)
dates = np.squeeze(df["index"][None].values)
df = df.drop(columns="index").set_index(dates).stack([1, 2]).drop("UN", axis=1)

# drop last 2 days to avoid count drops
df = df[(start_date <= df.index.get_level_values(0))
        & (df.index.get_level_values(0) <= pd.Timestamp.now().normalize() -
           pd.Timedelta(days=2))]