Beispiel #1
0
def test_onset_wk():
    adl = udata.ActualDataLoader("./data")
    aidx, adata = adl.get()

    target = udata.Target("onset_wk")
    y, Xs, yi = target.get_training_data(adl, [], None, 222222)

    # Check if all onset weeks of same season are the same
    seasons = [u.epiweek_to_season(i[0]) for i in yi]
    df = pd.DataFrame({
        "epiweek": [i[0] for i in yi],
        "season": [u.epiweek_to_season(i[0]) for i in yi],
        "region": [i[1] for i in yi],
        "onset_wk": y
    })

    for name, group in df.groupby(["season", "region"]):
        assert group["onset_wk"].isnull().all() or len(set(
            group["onset_wk"])) == 1

    # Random checks
    onset_map = [[1997, "nat", 7], [2007, "nat", 12]]

    for item in onset_map:
        predicted_onset = df[(df["season"] == item[0]) &
                             (df["region"] == item[1])].iloc[0, :]["onset_wk"]
        assert int(predicted_onset) == item[-1]
Beispiel #2
0
def test_peak():
    adl = udata.ActualDataLoader("./data")
    aidx, adata = adl.get()

    target = udata.Target("peak")
    y, Xs, yi = target.get_training_data(adl, [], None, 222222)

    assert all(aidx["epiweek"] == [i[0] for i in yi])
    assert all(aidx["region"] == [i[1] for i in yi])
    assert all(adata <= y)
Beispiel #3
0
def test_peak_wk():
    adl = udata.ActualDataLoader("./data")
    aidx, adata = adl.get()

    target = udata.Target("peak_wk")
    y, Xs, yi = target.get_training_data(adl, [], None, 222222)

    assert all(aidx["epiweek"] == [i[0] for i in yi])
    assert all(aidx["region"] == [i[1] for i in yi])

    assert all([
        u.epiweek_to_season(a) == u.epiweek_to_season(b)
        for a, b in zip([i[0] for i in yi], aidx["epiweek"])
    ])
import utils.misc as u
import losses
import os
from tqdm import tqdm


data_dir = snakemake.input.data_dir
EXP_NAME = snakemake.config["EXP_NAME"]
exp_dir = os.path.join(data_dir, "processed", EXP_NAME)
TEST_SPLIT_THRESH = snakemake.config["TEST_SPLIT_THRESH"][EXP_NAME]

COMPONENTS = [udata.Component(exp_dir, name) for name in u.available_models(exp_dir)]
ACTUAL_DL = udata.ActualDataLoader(data_dir)

REGIONS = ["nat", *[f"hhs{i}" for i in range(1, 11)], None]
TARGETS = [udata.Target(t) for t in [1, 2, 3, 4, "peak", "peak_wk", "onset_wk"]]


# Entry point
for target in tqdm(TARGETS):
    output_dir = u.ensure_dir(f"./results/{EXP_NAME}/{target.name}")

    for idx, cmp in enumerate(COMPONENTS):
        eval_df = {
            "region": [],
            "score": []
        }
        for region in REGIONS:
            y, Xs, yi = target.get_testing_data(ACTUAL_DL, COMPONENTS, region, TEST_SPLIT_THRESH)
            y_one_hot = udists.actual_to_one_hot(y, bins=target.bins)
            output = Xs[idx]