コード例 #1
0
CI = 0.95

state_cases = pd.read_csv(data / "Bihar_cases_data_Oct03.csv",
                          parse_dates=["date_reported"],
                          dayfirst=True)
state_ts = state_cases["date_reported"].value_counts().sort_index()
district_names, population_counts, _ = etl.district_migration_matrix(
    data / "Migration Matrix - District.csv")
populations = dict(zip(district_names, population_counts))

# first, look at state level predictions
(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower,
 total_cases, new_cases_ts, anomalies, anomaly_dates) = analytical_MPVS(
     state_ts,
     CI=CI,
     smoothing=notched_smoothing(window=smoothing),
     totals=False)

plt.Rt(dates, Rt_pred[1:], Rt_CI_upper[1:], Rt_CI_lower[1:], CI, ymin=0, ymax=4)\
    .title("\nBihar: Reproductive Number Estimate")\
    .annotate(f"data from {str(dates[0]).split()[0]} to {str(dates[-1]).split()[0]}")\
    .xlabel("date")\
    .ylabel("$R_t$", rotation=0, labelpad=20)\
    .show()

np.random.seed(33)
Bihar = SIR("Bihar",
            99_000_000,
            dT0=T_pred[-1],
            Rt0=Rt_pred[-1],
            lower_CI=T_CI_lower[-1],
コード例 #2
0
from epimargin.smoothing import notched_smoothing
""" Common data loading/cleaning functions and constants """

data = (Path(__file__).parent / "../data").resolve()
print(data)

USD = 1 / 72

age_bin_labels = ["0-17", "18-29", "30-39", "40-49", "50-59", "60-69", "70+"]

# Rt estimation parameters
CI = 0.95
window = 14
gamma = 0.2
infectious_period = 5
smooth = notched_smoothing(window)

# simulation parameters
simulation_start = pd.Timestamp("Jan 1, 2021")
num_sims = 10000

# common vaccination parameters
immunity_threshold = 0.75
Rt_threshold = 0.2

# misc
state = "TN"
survey_date = "October 23, 2020"

# palette
TN_color = "firebrick"
コード例 #3
0
ファイル: main.py プロジェクト: COVID-IWG/covid-metrics-infra
def run_estimates(request):
    state_code = get(request, 'state_code')
    state = state_code_lookup[state_code]

    print(f"Rt estimation for {state} ({state_code}) started")

    bucket = storage.Client().bucket(bucket_name)
    bucket.blob("pipeline/commons/refs/all_crosswalk.dta")\
        .download_to_filename("/tmp/all_crosswalk.dta")

    bucket.blob("pipeline/raw/states.csv")\
        .download_to_filename("/tmp/states.csv")

    bucket.blob("pipeline/raw/districts.csv")\
        .download_to_filename("/tmp/districts.csv")

    crosswalk = pd.read_stata("/tmp/all_crosswalk.dta")
    district_cases = pd.read_csv("/tmp/districts.csv")\
        .rename(columns = str.lower)\
        .set_index(["state", "district", "date"])\
        .sort_index()\
        .rename(index = lambda s: s.replace(" and ", " & "), level = 0)\
        .loc[state]
    state_cases = pd.read_csv("/tmp/states.csv")\
        .rename(columns = str.lower)\
        .set_index(["state", "date"])\
        .sort_index()\
        .rename(index = lambda s: s.replace(" and ", " & "), level = 0)\
        .loc[state]
    print(f"Estimating state-level Rt for {state_code}")
    normalized_state = state.replace(" and ", " And ").replace(" & ", " And ")
    lgd_state_name, lgd_state_id = crosswalk.query(
        "state_api == @normalized_state").filter(
            like="lgd_state").drop_duplicates().iloc[0]
    try:
        (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper,
         T_CI_lower, total_cases, new_cases_ts,
         *_) = analytical_MPVS(state_cases.iloc[-lookback:-cutoff].confirmed,
                               CI=CI,
                               smoothing=notched_smoothing(window=smoothing),
                               totals=True)

        pd.DataFrame(data = {
            "dates": dates[1:],
            "Rt_pred": Rt_pred,
            "Rt_CI_upper": Rt_CI_upper,
            "Rt_CI_lower": Rt_CI_lower,
            "T_pred": T_pred,
            "T_CI_upper": T_CI_upper,
            "T_CI_lower": T_CI_lower,
            "total_cases": total_cases[2:],
            "new_cases_ts": new_cases_ts,
        })\
            .assign(state = state, lgd_state_name = lgd_state_name, lgd_state_id = lgd_state_id)\
            .to_csv("/tmp/state_Rt.csv")

        # upload to cloud
        bucket.blob(
            f"pipeline/est/{state_code}_state_Rt.csv").upload_from_filename(
                "/tmp/state_Rt.csv", content_type="text/csv")
    except Exception as e:
        print(f"ERROR when estimating Rt for {state_code}", e)
        print(traceback.print_exc())

    if normalized_state in dissolved_states:
        print(f"Skipping district-level Rt for {state_code}")
    else:
        print(f"Estimating district-level Rt for {state} ({state_code})")
        estimates = []
        for district in filter(
                lambda _: _.strip() not in excluded,
                district_cases.index.get_level_values(0).unique()):
            print(f"running estimation for [{district}]")
            lgd_district_data = crosswalk.query(
                "state_api == @normalized_state & district_api == @district"
            ).filter(like="lgd_district").drop_duplicates()
            if not lgd_district_data.empty:
                lgd_district_name, lgd_district_id = lgd_district_data.iloc[0]
            else:
                lgd_district_name, lgd_district_id = lgd_state_name, lgd_state_id
            try:
                (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper,
                 T_CI_lower, total_cases, new_cases_ts, *_) = analytical_MPVS(
                     district_cases.loc[district].iloc[-lookback:-cutoff].
                     confirmed,
                     CI=CI,
                     smoothing=notched_smoothing(window=smoothing),
                     totals=True)
                estimates.append(
                    pd.DataFrame(
                        data={
                            "dates": dates[1:],
                            "Rt_pred": Rt_pred,
                            "Rt_CI_upper": Rt_CI_upper,
                            "Rt_CI_lower": Rt_CI_lower,
                            "T_pred": T_pred,
                            "T_CI_upper": T_CI_upper,
                            "T_CI_lower": T_CI_lower,
                            "total_cases": total_cases[2:],
                            "new_cases_ts": new_cases_ts,
                        }).assign(state=state,
                                  lgd_state_name=lgd_state_name,
                                  lgd_state_id=lgd_state_id,
                                  district=district,
                                  lgd_district_name=lgd_district_name,
                                  lgd_district_id=lgd_district_id))
            except Exception as e:
                print(f"ERROR when estimating Rt for {district}, {state_code}",
                      e)
                print(traceback.print_exc())

        pd.concat(estimates).to_csv("/tmp/district_Rt.csv")

        # upload to cloud
        bucket.blob(
            f"pipeline/est/{state_code}_district_Rt.csv").upload_from_filename(
                "/tmp/district_Rt.csv", content_type="text/csv")

    return "OK!"
コード例 #4
0
from epimargin.estimators import analytical_MPVS, linear_projection
from epimargin.etl.commons import download_data
from epimargin.models import SIR, NetworkedSIR
from epimargin.policy import simulate_PID_controller
from epimargin.smoothing import notched_smoothing
from epimargin.utils import days, setup
from mpl_toolkits.axes_grid1 import make_axes_locatable
from tqdm import tqdm

logger = getLogger("DKIJ")

# model/sim details
gamma     = 0.2
window    = 7
CI        = 0.95
smoothing = notched_smoothing(window = window)

dkij_drop_cols = [
    'age', 'sex', 'fever', 'temp', 'cough', 'flu', 'sore_throat', 'shortness_breath', 'shivering', 'headache', 'malaise', 'muscle_pain',
    'nausea_vomiting', 'abdominal_pain', 'diarrhoea', 'date_recovered',
    'date_died', 'heart_disease', 'diabetes', 'pneumonia', 'hypertension', 'malignant',
    'immunology_disorder', 'chronic_kidney', 'chronic_liver', 'copd',
    'obesity', 'pregnant', 'tracing', 'otg', 'icu', 'intubation', 'ecmo',
    'criteria_cases', 'age_group', 'age_group2', 'date_discharge',
    'patient_status', 'death'
]

shp_drop_cols = ['GID_0', 'NAME_0', 'GID_1', 'NAME_1', 'NL_NAME_1', 'GID_2', 'VARNAME_2', 'NL_NAME_2', 'TYPE_2', 'ENGTYPE_2', 'CC_2', 'HASC_2']

(data, figs) = setup(level = "INFO")
dkij = pd.read_stata(data/"dkijakarta_180820.dta")\
コード例 #5
0
    state_cases = state_cases[state_cases.date_reported <= "2020-09-30"]
    state_ts = state_cases["date_reported"].value_counts().sort_index()
    district_ts = state_cases.groupby(
        ["geo_reported",
         "date_reported"])["date_reported"].count().sort_index()
    districts, pops, migrations = etl.district_migration_matrix(
        data / "Migration Matrix - District.csv")
    districts = sorted([etl.replacements.get(dn, dn) for dn in districts])

    R_mandatory = dict()
    for district in districts:  #district_ts.index.get_level_values(0).unique():
        try:
            (_, Rt,
             *_) = analytical_MPVS(district_ts.loc[district],
                                   CI=CI,
                                   smoothing=notched_smoothing(window=10),
                                   totals=False)
            Rm = np.mean(Rt)
        except ValueError as v:
            Rm = 1.5
        R_mandatory[district] = Rm

    R_voluntary = {district: 1.2 * R for (district, R) in R_mandatory.items()}

    si, sf = 0, 10

    simulation_results = [
        run_policies(state_cases,
                     pops,
                     districts,
                     migrations,
コード例 #6
0
ファイル: mobility.py プロジェクト: COVID-IWG/epimargin
import epimargin.plots as plt
import numpy as np
import pandas as pd
import seaborn as sns
from epimargin.smoothing import notched_smoothing
from epimargin.etl.commons import download_data
from epimargin.etl.covid19india import data_path, load_all_data, get_time_series

sns.set_style("whitegrid", {'axes.grid' : False})

smoothed = notched_smoothing(window = 7)

mobility = pd.concat([
    pd.read_csv("data/2020_IN_Region_Mobility_Report.csv", parse_dates=["date"]),
    pd.read_csv("data/2021_IN_Region_Mobility_Report.csv", parse_dates=["date"])
])
stringency = pd.read_csv("data/OxCGRT_latest.csv", parse_dates=["Date"])

def plot_mobility(series, label, stringency = None, until = None, annotation = "Google Mobility Data; baseline mobility measured from Jan 3 - Feb 6"):
    plt.plot(series.date, smoothed(series.retail_and_recreation_percent_change_from_baseline), label = "Retail/Recreation")
    plt.plot(series.date, smoothed(series.grocery_and_pharmacy_percent_change_from_baseline),  label = "Grocery/Pharmacy")
    plt.plot(series.date, smoothed(series.parks_percent_change_from_baseline),                 label = "Parks")
    plt.plot(series.date, smoothed(series.transit_stations_percent_change_from_baseline),      label = "Transit Stations")
    plt.plot(series.date, smoothed(series.workplaces_percent_change_from_baseline),            label = "Workplaces")
    plt.plot(series.date, smoothed(series.residential_percent_change_from_baseline),           label = "Residential")
    if until:
        right = pd.Timestamp(until)
    elif stringency is not None:
        right = stringency.Date.max()
    else:
        right = series.date.iloc[-1]
コード例 #7
0
ファイル: TN_proj.py プロジェクト: COVID-IWG/epimargin
def setup(district) -> Tuple[Callable[[str], SIR], pd.DataFrame]:
    demographics = simulation_initial_conditions.loc[district]
    
    dR_conf = ts.loc[district].dR
    dR_conf = dR_conf.reindex(pd.date_range(dR_conf.index.min(), dR_conf.index.max()), fill_value = 0)
    dR_conf_smooth = pd.Series(smooth(dR_conf), index = dR_conf.index).clip(0).astype(int)
    R_conf_smooth  = dR_conf_smooth.cumsum().astype(int)

    R0 = R_conf_smooth[data_recency]

    dD_conf = ts.loc[district].dD
    dD_conf = dD_conf.reindex(pd.date_range(dD_conf.index.min(), dD_conf.index.max()), fill_value = 0)
    dD_conf_smooth = pd.Series(smooth(dD_conf), index = dD_conf.index).clip(0).astype(int)
    D_conf_smooth  = dD_conf_smooth.cumsum().astype(int)
    D0 = D_conf_smooth[data_recency]

    dT_conf = ts.loc[district].dT
    dT_conf = dT_conf.reindex(pd.date_range(dT_conf.index.min(), dT_conf.index.max()), fill_value = 0)

    (
        dates,
        Rt_pred, Rt_CI_upper, Rt_CI_lower,
        T_pred, T_CI_upper, T_CI_lower,
        total_cases, new_cases_ts,
        *_
    ) = analytical_MPVS(ts.loc[district].dT, CI = CI, smoothing = notched_smoothing(window = smoothing), totals = False)
    Rt_estimates = pd.DataFrame(data = {
        "dates"       : dates,
        "Rt_pred"     : Rt_pred,
        "Rt_CI_upper" : Rt_CI_upper,
        "Rt_CI_lower" : Rt_CI_lower,
        "T_pred"      : T_pred,
        "T_CI_upper"  : T_CI_upper,
        "T_CI_lower"  : T_CI_lower,
        "total_cases" : total_cases[2:],
        "new_cases_ts": new_cases_ts,
    })

    dT_conf_smooth = pd.Series(smooth(dT_conf), index = dT_conf.index).clip(0).astype(int)
    T_conf_smooth  = dT_conf_smooth.cumsum().astype(int)
    T0 = T_conf_smooth[data_recency]
    dT0 = dT_conf_smooth[data_recency]

    S0 = max(0, demographics.N_tot - T0)
    I0 = max(0, T0 - R0 - D0)

    return ( 
        lambda seed = 0: SIR(
            name = district, 
            mortality = demographics[[f"N_{i}" for i in range(7)]] @ np.array(list(TN_IFRs.values()))/demographics.N_tot,
            population = demographics.N_tot, 
            random_seed = seed,
            infectious_period = 10, 
            S0  = S0,
            I0  = I0, 
            R0  = R0, 
            D0  = D0, 
            dT0 = dT0, 
            Rt0 = Rt_estimates.set_index("dates").loc[data_recency].Rt_pred * demographics.N_tot/S0), 
        Rt_estimates
    )
コード例 #8
0
ファイル: main.py プロジェクト: COVID-IWG/covid-metrics-infra
def assemble_data(request):
    state_code = get(request, 'state_code')
    state = state_code_lookup[state_code]

    print(f"Assembling initial conditions for {state_code} ({state}).")

    bucket = storage.Client().bucket(bucket_name)
    data = Path("/tmp")

    bucket.blob("pipeline/commons/refs/all_india_sero_pop.csv")\
        .download_to_filename(data / "all_india_sero_pop.csv")

    bucket.blob("pipeline/raw/state_case_timeseries.csv")\
        .download_to_filename(data / "state_case_timeseries.csv")

    bucket.blob("pipeline/raw/district_case_timeseries.csv")\
        .download_to_filename(data / "district_case_timeseries.csv")

    bucket.blob("pipeline/raw/vaccine_doses_statewise.csv")\
        .download_to_filename(data / "vaccine_doses_statewise.csv")

    bucket.blob(f"pipeline/est/{state_code}_district_Rt.csv")\
        .download_to_filename(data / f"{state_code}_district_Rt.csv")

    bucket.blob(f"pipeline/est/{state_code}_state_Rt.csv")\
        .download_to_filename(data / f"{state_code}_state_Rt.csv")

    print(f"Downloaded simulation input data for {state_code} ({state}).")

    district_age_pop = pd.read_csv(data / "all_india_sero_pop.csv").set_index(
        ["state", "district"])

    state_ts = pd.read_csv(data / "state_case_timeseries.csv")\
        .set_index(["detected_state", "status_change_date"])\
        .drop(columns = ["date", "time", "delta", "logdelta"])\
        .rename(columns = {
            "Deceased":     "dD",
            "Hospitalized": "dT",
            "Recovered":    "dR"
        })
    district_ts = pd.read_csv(data / "district_case_timeseries.csv")\
        .set_index(["detected_state", "detected_district", "status_change_date"]).loc[state]\
        .drop(columns = ["date", "time", "delta", "logdelta"])\
        .rename(columns = {
            "Deceased":     "dD",
            "Hospitalized": "dT",
            "Recovered":    "dR"
        })

    state_Rt = pd.read_csv(data / f"{state_code}_state_Rt.csv",    index_col = 0, parse_dates = ["dates"])\
        [["dates", "Rt_pred"]]\
        .assign(district = state)\
        .drop_duplicates(subset = "district", keep = "last")\
        [["district", "Rt_pred"]]\
        .set_index("district")
    district_Rt = pd.read_csv(data / f"{state_code}_district_Rt.csv", index_col = 0, parse_dates = ["dates"])\
        [["district", "dates", "Rt_pred"]]\
        .drop_duplicates(subset = "district", keep = "last")\
        [["district", "Rt_pred"]]\
        .set_index("district")

    vax = pd.read_csv(
        data / "vaccine_doses_statewise.csv").set_index("State").T.dropna()
    vax.columns = vax.columns.str.title()
    vax.set_index(pd.to_datetime(vax.index), inplace=True)

    smooth = notched_smoothing(window=window)
    simulation_start = pd.Timestamp.today() - pd.Timedelta(days=cutoff)

    districts_to_run = district_age_pop.loc[state]
    # if time series data not available at the district level, coalesce to state/UT level
    if state in coalesce_states:
        districts_to_run = districts_to_run\
            .assign(**{f"infected_{i}": (lambda i: lambda _: _[f"sero_{i}"] * _[f"N_{i}"])(i) for i in range(7)})\
            .drop(columns = [f"sero_{i}" for i in range(7)])\
            .sum(axis = 0)\
            .to_frame().T\
            .assign(**{f"sero_{i}": (lambda i: lambda _: _[f"infected_{i}"] / _[f"N_{i}"])(i) for i in range(7)})\
            [districts_to_run.columns]\
            .assign(district = state)\
            .set_index("district")
        ts = state_ts
        districts_to_run = districts_to_run.join(state_Rt)
    else:
        ts = district_ts
        districts_to_run = districts_to_run.join(district_Rt)

    print(f"Done reading input data for {state_code} ({state}).")
    print(f"Running seroprevalence scaling for districts.")

    rows = []
    for _ in districts_to_run.dropna().itertuples():
        district, sero_0, sero_1, sero_2, sero_3, sero_4, sero_5, sero_6, N_0, N_1, N_2, N_3, N_4, N_5, N_6, N_tot, Rt = _
        print(f"Scaling for {state_code}/{district}.")

        dR_conf = ts.loc[district].dR
        dR_conf = dR_conf.reindex(pd.date_range(dR_conf.index.min(),
                                                dR_conf.index.max()),
                                  fill_value=0)
        if len(dR_conf) >= window + 1:
            dR_conf_smooth = pd.Series(smooth(dR_conf),
                                       index=dR_conf.index).clip(0).astype(int)
        else:
            dR_conf_smooth = dR_conf

        R_conf_smooth = dR_conf_smooth.cumsum().astype(int)
        R_conf = R_conf_smooth[survey_date if survey_date in
                               R_conf_smooth.index else -1]
        R_sero = (sero_0 * N_0 + sero_1 * N_1 + sero_2 * N_2 + sero_3 * N_3 +
                  sero_4 * N_4 + sero_5 * N_5 + sero_6 * N_6)
        R_ratio = R_sero / R_conf if R_conf != 0 else 1
        R0 = R_conf_smooth[simulation_start if simulation_start in
                           R_conf_smooth.index else -1] * R_ratio
        print("Scaled recoveries.")

        dD_conf = ts.loc[district].dD
        dD_conf = dD_conf.reindex(pd.date_range(dD_conf.index.min(),
                                                dD_conf.index.max()),
                                  fill_value=0)
        if len(dD_conf) >= window + 1:
            dD_conf_smooth = pd.Series(smooth(dD_conf),
                                       index=dD_conf.index).clip(0).astype(int)
        else:
            dD_conf_smooth = dD_conf
        D_conf_smooth = dD_conf_smooth.cumsum().astype(int)
        D0 = D_conf_smooth[simulation_start if simulation_start in
                           D_conf_smooth.index else -1]
        print("Scaled deaths.")

        dT_conf = ts.loc[district].dT
        pandemic_start = dT_conf.index.min()
        dT_conf = dT_conf.reindex(pd.date_range(dT_conf.index.min(),
                                                dT_conf.index.max()),
                                  fill_value=0)
        if len(dT_conf) >= window + 1:
            dT_conf_smooth = pd.Series(smooth(dT_conf),
                                       index=dT_conf.index).clip(0).astype(int)
        else:
            dT_conf_smooth = dT_conf
        T_conf_smooth = dT_conf_smooth.cumsum().astype(int)
        T_conf = T_conf_smooth[survey_date if survey_date in
                               T_conf_smooth.index else -1]
        T_sero = R_sero + D0
        T_ratio = T_sero / T_conf if T_conf != 0 else 1
        T0 = T_conf_smooth[simulation_start if simulation_start in
                           T_conf_smooth.index else -1] * T_ratio
        print("Scaled cases.")

        S0 = max(0, N_tot - T0)
        dD0 = dD_conf_smooth[simulation_start if simulation_start in
                             dD_conf_smooth.index else -1]
        dT0 = dT_conf_smooth[simulation_start if simulation_start in
                             dT_conf_smooth.index else -1] * T_ratio
        I0 = max(0, (T0 - R0 - D0))

        V0 = vax[state][simulation_start if simulation_start in vax.
                        index else -1] * N_tot / districts_to_run.N_tot.sum()
        print("Resolved vaccination data.")

        rows.append(
            (state_code, state, district, sero_0, N_0, sero_1, N_1, sero_2,
             N_2, sero_3, N_3, sero_4, N_4, sero_5, N_5, sero_6, N_6, N_tot,
             Rt, S0, I0, R0, D0, dT0, dD0, V0, pandemic_start))

    pd.DataFrame(rows, columns=columns).to_csv(
        data / f"{state_code}_simulation_initial_conditions.csv")
    bucket.blob(f"pipeline/sim/input/{state_code}_simulation_initial_conditions.csv")\
        .upload_from_filename(str(data / f"{state_code}_simulation_initial_conditions.csv"), content_type = "text/csv")

    return "OK!"
コード例 #9
0
ファイル: tutorial.py プロジェクト: COVID-IWG/epimargin
# a snapshot of this csv is checked into the repo at data/tutorial_timeseries.csv in case you run into download problems
download_data(data, "districts.csv",
              "https://api.covid19india.org/csv/latest/")

daily_reports = pd.read_csv(data / "districts.csv", parse_dates = ["Date"])\
    .rename(str.lower, axis = 1)\
    .set_index(["state", "district", "date"])\
    .sort_index()\
    .loc["Maharashtra", "Mumbai"]
daily_cases = daily_reports["confirmed"]\
    .diff()\
    .clip(lower = 0)\
    .dropna()\

smoother = notched_smoothing(window=5)
smoothed_cases = pd.Series(data=smoother(daily_cases), index=daily_cases.index)

# plot raw and cleaned data
beg = "December 15, 2020"
end = "March 1, 2021"
training_cases = smoothed_cases[beg:end]

plt.scatter(daily_cases[beg:end].index,
            daily_cases[beg:end].values,
            color="black",
            s=5,
            alpha=0.5,
            label="raw case count data")
plt.plot(training_cases.index,
         training_cases.values,
コード例 #10
0
import pandas as pd

from epimargin.smoothing import notched_smoothing
from epimargin.estimators import analytical_MPVS
import epimargin.plots as plt

CI = 0.95
gamma = 0.2
window = 3
smoothing = notched_smoothing(window=window)

schema = {
    "Date Symptom Onset": "symptom_onset",
    "Date of Hospital  Admissions": "admission",
    "Date tested": "tested",
    "Date of positive test result": "confirmed",
    "Date Recovered": "recovered",
    "Date Died": "died",
    "Kebupaten/Kota": "regency",
    "Kecamatan": "district",
    "age ": "age"
}

regency_names = {
    'Pangkep': 'Pangkajene Dan Kepulauan',
    'Pare-Pare': 'Parepare',
    'Selayar': 'Kepulauan Selayar',
    'Sidrap': 'Sidenreng Rappang'
}