import pandas as pd
from adaptive.etl.commons import download_data
from adaptive.etl.covid19india import data_path, get_time_series, load_all_data
from adaptive.utils import setup

data, _ = setup()

paths = {
    "v3": [data_path(i) for i in (1, 2)],
    "v4": [data_path(i) for i in range(3, 18)]
}

for target in paths['v3'] + paths['v4']:
    download_data(data, target)

df = load_all_data(v3_paths=[data / filepath for filepath in paths['v3']],
                   v4_paths=[data / filepath for filepath in paths['v4']])

schema = {"Deceased": "dD", "Recovered": "dR", "Hospitalized": "dT"}


def assemble_time_series(df):
    ts = get_time_series(df)
    deltas = ts[schema.keys()]\
        .rename(columns = schema)
    deltas = deltas.reindex(pd.date_range(deltas.index.min(),
                                          deltas.index.max()),
                            fill_value=0)
    merged = deltas.merge(deltas.cumsum(axis=0).rename(columns=lambda _: _[1]),
                          left_index=True,
                          right_index=True).astype(int)
# from Karnataka
COVID_age_ratios = np.array([
    0.01618736, 0.07107746, 0.23314877, 0.22946212, 0.18180406, 0.1882451,
    0.05852026, 0.02155489
])

india_pop = pd.read_csv(data / "india_pop.csv",
                        names=["state", "population"],
                        index_col="state").to_dict()["population"]
india_pop["Odisha"] = india_pop["Orissa"]
india_pop["Puducherry"] = india_pop["Pondicherry"]
india_pop["Uttarakhand"] = india_pop["Uttaranchal"]

# load covid19 india data
download_data(data, 'timeseries.json', "https://api.covid19india.org/v3/")
with (data / 'timeseries.json').open("rb") as fp:
    df = flat_table.normalize(pd.read_json(fp)).fillna(0)
df.columns = df.columns.str.split('.', expand=True)
dates = np.squeeze(df["index"][None].values)
df = df.drop(columns="index").set_index(dates).stack([1, 2]).drop("UN", axis=1)

# load Rt data
# Rt = pd.read_csv("data/Rt_timeseries_india.csv")
# date = "2020-12-24"
# for state in set(df.loc[date, :, :].columns) - {"TT", "LA", "SK", "NL"}:
# for state in ["TN"]:
#     N = india_pop[state_code_lookup[state].replace("&", "and")]
#     T = df[state].loc[date, "total", "confirmed"]
#     R = df[state].loc[date, "total", "recovered"]
#     D = df[state].loc[date, "total", "deceased"]
    fs, f0, Q = 1, 1/7, 1
    b1, a1 = iirnotch(f0, Q, fs)
    b2, a2 = iirnotch(2*f0, 2*Q, fs)
    b = convolve(b1, b2)
    a = convolve(a1, a2)
    notched = pd.Series(filtfilt(b, a, ts))
    notched.index = ts.index
    return notched

root = cwd()
data = mkdir(root/"data")
figs = mkdir(root/"figs")

###########################################################
# download latest case data
download_data(data, 'state_wise_daily.csv')
df = load_statewise_data(data/"state_wise_daily.csv")
ts = get_time_series(df, "state")

###########################################################
# load delay data
api_diff = pd.read_csv(data/"daily_diff.csv", parse_dates=["status_change_date", "report_date"],  dayfirst=True)
delay = api_diff[(api_diff.current_status == "Hospitalized") & (api_diff.report_date > "2020-08-02")].copy()
delay = delay.drop(columns = [col for col in delay.columns if col.startswith("Unnamed")] + ["rowhash"])
delay["newhash"] = delay[["patient_number", "date_announced", "detected_district", "detected_state","current_status", "status_change_date", "num_cases"]].apply(lambda x: hash(tuple(x)), axis = 1)
delay = delay.drop_duplicates(subset=["newhash"], keep="first")
delay["delay"] = (delay.report_date - delay.status_change_date).dt.days
state_hist = delay[["detected_state", "num_cases", "delay"]].set_index(["detected_state", "delay"])["num_cases"].sum(level = [0, 1]).sort_index()
state_dist = state_hist/state_hist.sum(level = 0)

delay_hist = delay.groupby("delay")["num_cases"].sum()
        top_level = json.load(fp)
    df = pd.DataFrame([(_[date], _[total_cases])
                       for _ in top_level[timeseries]],
                      columns=["date", "total_cases"])
    df["date"] = (date_scale * df["date"]).apply(pd.Timestamp)
    df.set_index("date", inplace=True)
    if start_date:
        return df[df.index >= start_date]
    return df


(data, figs) = setup(level="INFO")
for province in provinces:
    logger.info("downloading data for %s", province)
    download_data(data,
                  filename(province),
                  base_url="https://data.covid19.go.id/public/api/")

province_cases = {
    province: load_province_timeseries(data, province, "Apr 1, 2020")
    for province in provinces
}
bgn = min(cases.index.min() for cases in province_cases.values())
end = max(cases.index.max() for cases in province_cases.values())
idx = pd.date_range(bgn, end)
province_cases = {
    province: cases.reindex(idx, method="pad").fillna(0)
    for (province, cases) in province_cases.items()
}

prediction_period = 14 * days