Example #1
0
def hydrate_datastore(_):
    root = Path("/tmp")
    data = root / "data"
    figs = root / "figs"

    data.mkdir(exist_ok=True)
    figs.mkdir(exist_ok=True)

    # define data versions for api files
    paths = {
        "v3": [data_path(i) for i in (1, 2)],
        "v4": [data_path(i) for i in (3, 4, 5, 6, 7, 8, 9, 10)]
    }

    for target in paths['v3'] + paths['v4']:
        download_data(data, target)

    df = load_all_data(v3_paths=[data / filepath for filepath in paths['v3']],
                       v4_paths=[data / filepath for filepath in paths['v4']])

    data_recency = str(df["date_announced"].max()).split()[0]
    run_date = str(pd.Timestamp.now()).split()[0]

    print(f"data_recency: {data_recency}")
    print(f"run_date    : {run_date}")

    df["hash"] = df.apply(lambda x: hash(tuple(x)), axis=1)
    df["report_date"] = run_date

    df.to_csv(data / f"hashed_records_{run_date}.csv")
    print(df.tail())
Example #2
0
import pandas as pd
from adaptive.etl.commons import download_data
from adaptive.etl.covid19india import data_path, get_time_series, load_all_data
from adaptive.utils import setup

data, _ = setup()

paths = {
    "v3": [data_path(i) for i in (1, 2)],
    "v4": [data_path(i) for i in range(3, 18)]
}

for target in paths['v3'] + paths['v4']:
    download_data(data, target)

df = load_all_data(v3_paths=[data / filepath for filepath in paths['v3']],
                   v4_paths=[data / filepath for filepath in paths['v4']])

schema = {"Deceased": "dD", "Recovered": "dR", "Hospitalized": "dT"}


def assemble_time_series(df):
    ts = get_time_series(df)
    deltas = ts[schema.keys()]\
        .rename(columns = schema)
    deltas = deltas.reindex(pd.date_range(deltas.index.min(),
                                          deltas.index.max()),
                            fill_value=0)
    merged = deltas.merge(deltas.cumsum(axis=0).rename(columns=lambda _: _[1]),
                          left_index=True,
                          right_index=True).astype(int)
    merged.index.name = "date"
Example #3
0
sero = pd.read_stata("data/kadata.labdate.dta")\
    .drop(columns = ["_merge"])\

sero["S"] = sero["elisa_pos15"]
sero["t0"] = sero["date_med"]
sero["td"] = sero["t0"] + pd.Timedelta(days=30)
sero["hr"] = sero.hom_region.map(hom_regions_numeric)

# pull down COVID 19 India data
paths = {
    "v3": [data_path(i) for i in (1, 2)],
    "v4": [data_path(i) for i in range(3, 19)]
}
# for target in paths['v3'] + paths['v4']:
#     download_data(data, target)
df = load_all_data(v3_paths = [data/filepath for filepath in paths['v3']],  v4_paths = [data/filepath for filepath in paths['v4']])\
    .query("detected_state == 'Karnataka'")

# get all deaths in KA on Aug 29 by district
get_time_series(df, "detected_district")\
    .query("status_change_date <= 'Aug 29, 2020'", engine = "python")\
    .Deceased.sum(level = 0)\
    .drop("Other State")\
    .astype(int)\
    .to_csv(data/"ka_cumulative_deaths_aug29.csv")

# aggregate time series by hom_region
df["detected_region"] = df.detected_district.map(hom_regions_rev)
ka_ts = get_time_series(df.dropna(subset=["detected_region"]),
                        "detected_region").rename(columns={
                            "Deceased": "dD",
                            "Hospitalized": "dT",