예제 #1
0
def load_national_timeseries(download: bool = False) -> pd.DataFrame:
    print(":: loading case timeseries data")
    if download:
        download_data(data, 'timeseries.json',
                      "https://api.covid19india.org/v3/")
    with (data / 'timeseries.json').open("rb") as fp:
        df = flat_table.normalize(pd.read_json(fp)).fillna(0)
    df.columns = df.columns.str.split('.', expand=True)
    dates = np.squeeze(df["index"][None].values)
    return df.drop(columns="index",
                   level=0).set_index(dates).stack([1, 2]).drop("UN", axis=1)
예제 #2
0
def get_state_timeseries(states=["Tamil Nadu"],
                         download: bool = False) -> pd.DataFrame:
    paths = {
        "v3": [data_path(i) for i in (1, 2)],
        "v4": [data_path(i) for i in range(3, 25)]
    }
    if download:
        for target in paths['v3'] + paths['v4']:
            download_data(data, target)
    return load_all_data(v3_paths = [data/filepath for filepath in paths['v3']],  v4_paths = [data/filepath for filepath in paths['v4']])\
        .query("detected_state in @states" if states != "*" else "detected_state != 'NULL'", engine = "python")\
        .pipe(lambda _: get_time_series(_, ["detected_state", "detected_district"]))\
        .drop(columns = ["date", "time", "delta", "logdelta"])\
        .rename(columns = {
            "Deceased":     "dD",
            "Hospitalized": "dT",
            "Recovered":    "dR"
        })
예제 #3
0
def run_download(_):
    run_date = pd.Timestamp.now().strftime("%d-%m-%Y") 
    print(f"Starting download of API files on {run_date}")
    # set up
    root = Path("/tmp")
    data = mkdir(root/"data")

    # download aggregated CSVs as well
    download_data(data, "states.csv")
    download_data(data, "districts.csv")

    print("Uploading time series to storage bucket.")
    bucket = storage.Client().bucket(bucket_name)
    bucket.blob("pipeline/raw/districts.csv")\
        .upload_from_filename(str(data/"districts.csv"), content_type = "text/csv")
    bucket.blob("pipeline/raw/states.csv")\
        .upload_from_filename(str(data/"states.csv"), content_type = "text/csv")

    return 'OK!'
예제 #4
0
def get_state_timeseries(
    states = "*", 
    download: bool = False, 
    aggregation_cols = ["detected_state", "detected_district"], 
    last_API_file: int = 27) -> pd.DataFrame:
    """ load state- and district-level data, downloading source files if specified """
    paths = {"v3": [data_path(i) for i in (1, 2)], "v4": [data_path(i) for i in range(3, last_API_file)]}
    if download:
        for target in paths['v3'] + paths['v4']: 
            download_data(data, target)
    return load_all_data(v3_paths = [data/filepath for filepath in paths['v3']],  v4_paths = [data/filepath for filepath in paths['v4']])\
        .query("detected_state in @states" if states != "*" else "detected_state != 'NULL'")\
        .pipe(lambda _: get_time_series(_, aggregation_cols))\
        .drop(columns = ["date", "time", "delta", "logdelta"])\
        .rename(columns = {
            "Deceased":     "dD",
            "Hospitalized": "dT",
            "Recovered":    "dR"
        })
예제 #5
0
import pandas as pd
from epimargin.etl.commons import download_data
from epimargin.etl.covid19india import data_path, get_time_series, load_all_data
from epimargin.utils import setup

data, _ = setup()

paths = {
    "v3": [data_path(i) for i in (1, 2)],
    "v4": [data_path(i) for i in range(3, 18)]
}

for target in paths['v3'] + paths['v4']:
    download_data(data, target)

df = load_all_data(v3_paths=[data / filepath for filepath in paths['v3']],
                   v4_paths=[data / filepath for filepath in paths['v4']])

schema = {"Deceased": "dD", "Recovered": "dR", "Hospitalized": "dT"}


def assemble_time_series(df):
    ts = get_time_series(df)
    deltas = ts[schema.keys()]\
        .rename(columns = schema)
    deltas = deltas.reindex(pd.date_range(deltas.index.min(),
                                          deltas.index.max()),
                            fill_value=0)
    merged = deltas.merge(deltas.cumsum(axis=0).rename(columns=lambda _: _[1]),
                          left_index=True,
                          right_index=True).astype(int)
예제 #6
0
    "India", 
    stringency = stringency)
plt.PlotDevice()\
    .title("\nIndia: Mobility & Lockdown Trends")\
    .annotate("Google Mobility Data (baseline mobility measured from Jan 3 - Feb 6, 2020) + Oxford COVID Policy Tracker")
plt.show()

# mobility vs cases 

from pathlib import Path

import flat_table
from epimargin.etl.commons import download_data

data = Path("./data")
download_data(data, 'timeseries.json', "https://api.covid19india.org/v3/")

# data prep
with (data/'timeseries.json').open("rb") as fp:
    df = flat_table.normalize(pd.read_json(fp)).fillna(0)
df.columns = df.columns.str.split('.', expand = True)
dates = np.squeeze(df["index"][None].values)
df = df.drop(columns = "index").set_index(dates).stack([1, 2]).drop("UN", axis = 1)

series = mobility[mobility.sub_region_1.isna()]
plt.plot(series.date, smoothed(series.retail_and_recreation_percent_change_from_baseline), label = "Retail/Recreation")
plt.fill_betweenx((-100, 60), pd.to_datetime("March 24, 2020"), pd.to_datetime("June 1, 2020"), color = "black", alpha = 0.05, zorder = -1)
plt.text(s = "national lockdown", x = pd.to_datetime("April 27, 2020"), y = -20, fontdict = plt.note_font, ha = "center", va = "top")
plt.ylim(-100, 10)
plt.xlim(series.date.min(), series.date.max())
plt.legend(loc = 'upper right')
예제 #7
0
                                smoothing=smooth,
                                totals=True)
    return pd.DataFrame(
        data={
            "date": estimates[0],
            "Rt": estimates[1],
            "Rt_upper": estimates[2],
            "Rt_lower": estimates[3],
            "total_cases": estimates[-4][2:],
            "new_cases": estimates[-3],
        })


data, figs = setup()

download_data(data, 'timeseries.json', "https://api.covid19india.org/v3/")
download_data(data, 'state_wise.csv', "https://api.covid19india.org/v3/")
download_data(data, 'states.csv', "https://api.covid19india.org/v3/")
download_data(data, 'districts.csv', "https://api.covid19india.org/v3/")

# data prep
with (data / 'timeseries.json').open("rb") as fp:
    df = flat_table.normalize(pd.read_json(fp)).fillna(0)
df.columns = df.columns.str.split('.', expand=True)
dates = np.squeeze(df["index"][None].values)
df = df.drop(columns = "index")\
    .set_index(dates)\
    .stack([1, 2])\
    .drop("UN", axis = 1)\
    .fillna(0)
예제 #8
0
    )\
    .fit()\
    .predict([1, julian_dates[-1] + period])[0]


# set to cloud temp directory if not explicitly told to run locally
root = cwd() if len(sys.argv) > 1 and sys.argv[1] == "--local" else Path(
    "/tmp")
data = root / "data"

# model details
gamma = 0.2
smoothing = 10
CI = 0.95

download_data(data, 'state_wise_daily.csv')

state_df = load_statewise_data(data / "state_wise_daily.csv")
country_time_series = get_time_series(state_df)

estimates = []
timeseries = []

# country level
(dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower,
 total_cases, new_cases_ts, anomalies, anomaly_dates) = analytical_MPVS(
     country_time_series["Hospitalized"].iloc[:-1],
     CI=CI,
     smoothing=notched_smoothing(window=smoothing))

country_code = state_name_lookup["India"]
예제 #9
0
        top_level = json.load(fp)
    df = pd.DataFrame([(_[date], _[total_cases])
                       for _ in top_level[timeseries]],
                      columns=["date", "total_cases"])
    df["date"] = (date_scale * df["date"]).apply(pd.Timestamp)
    df.set_index("date", inplace=True)
    if start_date:
        return df[df.index >= start_date]
    return df


(data, figs) = setup(level="INFO")
for province in provinces:
    logger.info("downloading data for %s", province)
    download_data(data,
                  filename(province),
                  base_url="https://data.covid19.go.id/public/api/")

province_cases = {
    province: load_province_timeseries(data, province, "Apr 1, 2020")
    for province in provinces
}
bgn = min(cases.index.min() for cases in province_cases.values())
end = max(cases.index.max() for cases in province_cases.values())
idx = pd.date_range(bgn, end)
province_cases = {
    province: cases.reindex(idx, method="pad").fillna(0)
    for (province, cases) in province_cases.items()
}

prediction_period = 14 * days
예제 #10
0
def load_vax_data(download = False):
    if download:
        download_data(data, "vaccine_doses_statewise.csv")
    vax = pd.read_csv(data/"vaccine_doses_statewise.csv").set_index("State").T
    vax.columns = vax.columns.str.title()
    return vax.set_index(pd.to_datetime(vax.index, format = "%d/%m/%Y"))