def test_predict(self, tmp_path, use_pred_months, predict_delta): x, _, _ = _make_dataset(size=(5, 5), const=True) y = x.isel(time=[-1]) train_features = tmp_path / "features/one_month_forecast/train/1980_1" train_features.mkdir(parents=True) test_features = tmp_path / "features/one_month_forecast/test/1980_1" test_features.mkdir(parents=True) norm_dict = {"VHI": {"mean": 0.0, "std": 1.0}} with (tmp_path / "features/one_month_forecast/normalizing_dict.pkl" ).open("wb") as f: pickle.dump(norm_dict, f) x.to_netcdf(test_features / "x.nc") y.to_netcdf(test_features / "y.nc") x.to_netcdf(train_features / "x.nc") y.to_netcdf(train_features / "y.nc") # static x_static, _, _ = _make_dataset(size=(5, 5), add_times=False) static_features = tmp_path / f"features/static" static_features.mkdir(parents=True) x_static.to_netcdf(static_features / "data.nc") static_norm_dict = {"VHI": {"mean": 0.0, "std": 1.0}} with (tmp_path / f"features/static/normalizing_dict.pkl").open("wb") as f: pickle.dump(static_norm_dict, f) dense_features = [10] hidden_size = 128 rnn_dropout = 0.25 model = RecurrentNetwork( hidden_size=hidden_size, dense_features=dense_features, rnn_dropout=rnn_dropout, data_folder=tmp_path, predict_delta=predict_delta, ) model.train() test_arrays_dict, pred_dict = model.predict() # the foldername "1980_1" is the only one which should be in the dictionaries assert ("1980_1" in test_arrays_dict.keys()) and (len(test_arrays_dict) == 1) assert ("1980_1" in pred_dict.keys()) and (len(pred_dict) == 1) if not predict_delta: # _make_dataset with const=True returns all ones assert (test_arrays_dict["1980_1"]["y"] == 1).all()
def rnn( experiment="one_month_forecast", include_pred_month=True, surrounding_pixels=None, ignore_vars=None, include_static=True, ): # if the working directory is alread ml_drought don't need ../data if Path(".").absolute().as_posix().split("/")[-1] == "ml_drought": data_path = Path("data") else: data_path = Path("../data") predictor = RecurrentNetwork( hidden_size=128, data_folder=data_path, experiment=experiment, include_pred_month=include_pred_month, surrounding_pixels=surrounding_pixels, ignore_vars=ignore_vars, include_static=include_static, ) predictor.train(num_epochs=50, early_stopping=5) predictor.evaluate(save_preds=True) predictor.save_model()
def test_train(self, tmp_path, capsys, use_pred_months, predict_delta, static): x, _, _ = _make_dataset(size=(5, 5), const=True) y = x.isel(time=[-1]) test_features = tmp_path / "features/one_month_forecast/train/1980_1" test_features.mkdir(parents=True) norm_dict = {"VHI": {"mean": 0, "std": 1}} with (tmp_path / "features/one_month_forecast/normalizing_dict.pkl" ).open("wb") as f: pickle.dump(norm_dict, f) x.to_netcdf(test_features / "x.nc") y.to_netcdf(test_features / "y.nc") # static x_static, _, _ = _make_dataset(size=(5, 5), add_times=False) static_features = tmp_path / f"features/static" static_features.mkdir(parents=True) x_static.to_netcdf(static_features / "data.nc") static_norm_dict = {"VHI": {"mean": 0.0, "std": 1.0}} with (tmp_path / f"features/static/normalizing_dict.pkl").open("wb") as f: pickle.dump(static_norm_dict, f) dense_features = [10] hidden_size = 128 rnn_dropout = 0.25 model = RecurrentNetwork( hidden_size=hidden_size, dense_features=dense_features, rnn_dropout=rnn_dropout, data_folder=tmp_path, include_monthly_aggs=True, predict_delta=predict_delta, static=static, ) check_inversion = False model.train(check_inversion=check_inversion) captured = capsys.readouterr() expected_stdout = "Epoch 1, train smooth L1:" assert expected_stdout in captured.out assert type(model.model) == RNN, f"Model attribute not an RNN!"
def run_models(data_path, experiment): # NOTE: this model is the same for all experiments print("Running persistence model") predictor = Persistence(data_path, experiment=experiment) predictor.evaluate(save_preds=True) # linear regression print(f"Running Linear Regression model: {experiment}") predictor = LinearRegression(data_path, experiment=experiment, include_pred_month=True, surrounding_pixels=1) predictor.train(num_epochs=10, early_stopping=3) # linear network print(f"Running Linear Neural Network model: {experiment}") predictor = LinearNetwork( data_folder=data_path, experiment=experiment, layer_sizes=[100], include_pred_month=True, surrounding_pixels=1, ) predictor.train(num_epochs=10, early_stopping=3) predictor.evaluate(save_preds=True) predictor.save_model() # recurrent network print(f"Running RNN (LSTM) model: {experiment}") predictor = RecurrentNetwork( data_folder=data_path, hidden_size=128, experiment=experiment, include_pred_month=True, surrounding_pixels=1, ) predictor.train(num_epochs=10, early_stopping=3) predictor.evaluate(save_preds=True) predictor.save_model() # EA LSTM print(f"Running Entity Aware LSTM model: {experiment}") predictor = EARecurrentNetwork( data_folder=data_path, hidden_size=128, experiment=experiment, include_pred_month=True, surrounding_pixels=1, ) predictor.train(num_epochs=10, early_stopping=3) predictor.evaluate(save_preds=True) predictor.save_model()
def rnn(experiment='one_month_forecast', include_pred_month=True, surrounding_pixels=1): # if the working directory is alread ml_drought don't need ../data if Path('.').absolute().as_posix().split('/')[-1] == 'ml_drought': data_path = Path('data') else: data_path = Path('../data') predictor = RecurrentNetwork(hidden_size=128, data_folder=data_path, experiment=experiment, include_pred_month=include_pred_month, surrounding_pixels=surrounding_pixels) predictor.train(num_epochs=50, early_stopping=5) predictor.evaluate(save_preds=True) predictor.save_model() _ = predictor.explain(save_shap_values=True)
def rnn( experiment="one_month_forecast", include_pred_month=True, surrounding_pixels=None, explain=False, static="features", ignore_vars=None, num_epochs=50, early_stopping=5, hidden_size=128, predict_delta=False, spatial_mask=None, include_latlons=False, normalize_y=True, include_prev_y=True, include_yearly_aggs=True, clear_nans=True, weight_observations=False, ): predictor = RecurrentNetwork( hidden_size=hidden_size, data_folder=get_data_path(), experiment=experiment, include_pred_month=include_pred_month, surrounding_pixels=surrounding_pixels, static=static, ignore_vars=ignore_vars, predict_delta=predict_delta, spatial_mask=spatial_mask, include_latlons=include_latlons, normalize_y=normalize_y, include_prev_y=include_prev_y, include_yearly_aggs=include_yearly_aggs, clear_nans=clear_nans, weight_observations=weight_observations, ) predictor.train(num_epochs=num_epochs, early_stopping=early_stopping) predictor.evaluate(save_preds=True) predictor.save_model() if explain: _ = predictor.explain(save_shap_values=True)
def rnn( experiment="one_month_forecast", include_pred_month=True, surrounding_pixels=None, ignore_vars=None, pretrained=True, ): predictor = RecurrentNetwork( hidden_size=128, data_folder=get_data_path(), experiment=experiment, include_pred_month=include_pred_month, surrounding_pixels=surrounding_pixels, ignore_vars=ignore_vars, ) predictor.train(num_epochs=50, early_stopping=5) predictor.evaluate(save_preds=True) predictor.save_model() _ = predictor.explain(save_shap_values=True)
def test_save(self, tmp_path, monkeypatch): features_per_month = 5 dense_features = [10] input_dense_features = copy(dense_features) hidden_size = 128 rnn_dropout = 0.25 include_pred_month = True experiment = "one_month_forecast" ignore_vars = ["precip"] include_latlons = True include_prev_y = True normalize_y = False def mocktrain(self): self.model = RNN( features_per_month, dense_features, hidden_size, rnn_dropout, include_pred_month, include_latlons, experiment="one_month_forecast", include_prev_y=include_prev_y, ) self.features_per_month = features_per_month monkeypatch.setattr(RecurrentNetwork, "train", mocktrain) model = RecurrentNetwork( hidden_size=hidden_size, dense_features=dense_features, rnn_dropout=rnn_dropout, data_folder=tmp_path, ignore_vars=ignore_vars, experiment=experiment, include_pred_month=include_pred_month, include_latlons=include_latlons, include_prev_y=include_prev_y, normalize_y=normalize_y, ) model.train() model.save_model() assert (tmp_path / "models/one_month_forecast/rnn/model.pt" ).exists(), f"Model not saved!" model_dict = torch.load(model.model_dir / "model.pt", map_location="cpu") for key, val in model_dict["model"]["state_dict"].items(): assert (model.model.state_dict()[key] == val).all() assert model_dict["model"]["features_per_month"] == features_per_month assert model_dict["hidden_size"] == hidden_size assert model_dict["rnn_dropout"] == rnn_dropout assert model_dict["dense_features"] == input_dense_features assert model_dict["include_pred_month"] == include_pred_month assert model_dict["experiment"] == experiment assert model_dict["ignore_vars"] == ignore_vars assert model_dict["include_latlons"] == include_latlons assert model_dict["include_prev_y"] == include_prev_y assert model_dict["normalize_y"] == normalize_y