def test_onset_wk(): adl = udata.ActualDataLoader("./data") aidx, adata = adl.get() target = udata.Target("onset_wk") y, Xs, yi = target.get_training_data(adl, [], None, 222222) # Check if all onset weeks of same season are the same seasons = [u.epiweek_to_season(i[0]) for i in yi] df = pd.DataFrame({ "epiweek": [i[0] for i in yi], "season": [u.epiweek_to_season(i[0]) for i in yi], "region": [i[1] for i in yi], "onset_wk": y }) for name, group in df.groupby(["season", "region"]): assert group["onset_wk"].isnull().all() or len(set( group["onset_wk"])) == 1 # Random checks onset_map = [[1997, "nat", 7], [2007, "nat", 12]] for item in onset_map: predicted_onset = df[(df["season"] == item[0]) & (df["region"] == item[1])].iloc[0, :]["onset_wk"] assert int(predicted_onset) == item[-1]
def test_peak(): adl = udata.ActualDataLoader("./data") aidx, adata = adl.get() target = udata.Target("peak") y, Xs, yi = target.get_training_data(adl, [], None, 222222) assert all(aidx["epiweek"] == [i[0] for i in yi]) assert all(aidx["region"] == [i[1] for i in yi]) assert all(adata <= y)
def test_peak_wk(): adl = udata.ActualDataLoader("./data") aidx, adata = adl.get() target = udata.Target("peak_wk") y, Xs, yi = target.get_training_data(adl, [], None, 222222) assert all(aidx["epiweek"] == [i[0] for i in yi]) assert all(aidx["region"] == [i[1] for i in yi]) assert all([ u.epiweek_to_season(a) == u.epiweek_to_season(b) for a, b in zip([i[0] for i in yi], aidx["epiweek"]) ])
import pandas as pd import utils.dists as udists import utils.data as udata import utils.misc as u import losses import os from tqdm import tqdm data_dir = snakemake.input.data_dir EXP_NAME = snakemake.config["EXP_NAME"] exp_dir = os.path.join(data_dir, "processed", EXP_NAME) TEST_SPLIT_THRESH = snakemake.config["TEST_SPLIT_THRESH"][EXP_NAME] COMPONENTS = [udata.Component(exp_dir, name) for name in u.available_models(exp_dir)] ACTUAL_DL = udata.ActualDataLoader(data_dir) REGIONS = ["nat", *[f"hhs{i}" for i in range(1, 11)], None] TARGETS = [udata.Target(t) for t in [1, 2, 3, 4, "peak", "peak_wk", "onset_wk"]] # Entry point for target in tqdm(TARGETS): output_dir = u.ensure_dir(f"./results/{EXP_NAME}/{target.name}") for idx, cmp in enumerate(COMPONENTS): eval_df = { "region": [], "score": [] } for region in REGIONS: