Esempio n. 1
0
    def test_predict_and_explain(self, tmp_path, use_pred_months,
                                 predict_delta):
        x, _, _ = _make_dataset(size=(5, 5), const=True)
        y = x.isel(time=[-1])

        train_features = tmp_path / "features/one_month_forecast/train/1980_1"
        train_features.mkdir(parents=True)

        test_features = tmp_path / "features/one_month_forecast/test/1980_1"
        test_features.mkdir(parents=True)

        norm_dict = {"VHI": {"mean": 0.0, "std": 1.0}}
        with (tmp_path / "features/one_month_forecast/normalizing_dict.pkl"
              ).open("wb") as f:
            pickle.dump(norm_dict, f)

        x.to_netcdf(test_features / "x.nc")
        y.to_netcdf(test_features / "y.nc")

        x.to_netcdf(train_features / "x.nc")
        y.to_netcdf(train_features / "y.nc")

        # static
        x_static, _, _ = _make_dataset(size=(5, 5), add_times=False)
        static_features = tmp_path / f"features/static"
        static_features.mkdir(parents=True)
        x_static.to_netcdf(static_features / "data.nc")

        static_norm_dict = {"VHI": {"mean": 0.0, "std": 1.0}}
        with (tmp_path /
              f"features/static/normalizing_dict.pkl").open("wb") as f:
            pickle.dump(static_norm_dict, f)

        dense_features = [10]
        hidden_size = 128
        rnn_dropout = 0.25

        model = EARecurrentNetwork(
            hidden_size=hidden_size,
            dense_features=dense_features,
            rnn_dropout=rnn_dropout,
            data_folder=tmp_path,
            predict_delta=predict_delta,
            normalize_y=True,
        )
        model.train()
        test_arrays_dict, pred_dict = model.predict()

        # the foldername "1980_1" is the only one which should be in the dictionaries
        assert ("1980_1" in test_arrays_dict.keys()) and (len(test_arrays_dict)
                                                          == 1)
        assert ("1980_1" in pred_dict.keys()) and (len(pred_dict) == 1)

        if not predict_delta:
            # _make_dataset with const=True returns all ones
            assert (test_arrays_dict["1980_1"]["y"] == 1).all()
        else:
            # _make_dataset with const=True & predict_delta
            # returns a change of 0
            assert (test_arrays_dict["1980_1"]["y"] == 0).all()

        # test the Morris explanation works
        test_dl = next(
            iter(
                model.get_dataloader(mode="test",
                                     to_tensor=True,
                                     shuffle_data=False)))

        for key, val in test_dl.items():
            output_m = model.explain(val.x,
                                     save_explanations=True,
                                     method="morris")
            assert type(output_m) is TrainData
            assert (model.model_dir /
                    "analysis/morris_value_historical.npy").exists()
Esempio n. 2
0
)
# test the training functionality
ealstm.train()

# test the prediction functionality
ealstm.predict()

#
captured = capsys.readouterr()
expected_stdout = "`include_yearly_aggs` does not yet work for dynamic dataloder. Setting to False"
assert (
    captured.out == expected_stdout
), f"Expected stdout to be {expected_stdout}, got {captured.out}"

# test getting the dataloader
dl = ealstm.get_dataloader(mode='train')
X, y = dl.__iter__().__next__()

# the dataloader loaded is the dynamic ?
test_years = np.arange(2011, 2017)
assert isinstance(dl, DynamicDataLoader)
assert all([pd.to_datetime(t).year in [y for y in test_years] for t in dl.valid_test_times]), "Test times are validly chosen"
assert all([pd.to_datetime(t).year not in [y for y in test_years]
            for t in dl.valid_train_times]), "Train times are validly chosen"

# the dataloder data is legit?
assert isinstance(X, tuple)
assert isinstance(X[0], np.ndarray)
assert isinstance(y, np.ndarray)
assert y.shape[-1] == 1