Exemple #1
0
    def test_train(
        self, tmp_path, capsys, use_pred_months, experiment, monthly_agg, predict_delta
    ):
        x, _, _ = _make_dataset(size=(5, 5), const=True)
        x_static, _, _ = _make_dataset(size=(5, 5), add_times=False)
        y = x.isel(time=[-1])

        x_add1, _, _ = _make_dataset(size=(5, 5), const=True, variable_name="precip")
        x_add1 = x_add1 * 2
        x_add2, _, _ = _make_dataset(size=(5, 5), const=True, variable_name="temp")
        x_add2 = x_add2 * 3
        x = xr.merge([x, x_add1, x_add2])

        norm_dict = {
            "VHI": {"mean": 0, "std": 1},
            "precip": {"mean": 0, "std": 1},
            "temp": {"mean": 0, "std": 1},
        }

        static_norm_dict = {"VHI": {"mean": 0.0, "std": 1.0}}

        test_features = tmp_path / f"features/{experiment}/train/2001_12"
        test_features.mkdir(parents=True)
        pred_features = tmp_path / f"features/{experiment}/test/2001_12"
        pred_features.mkdir(parents=True)
        static_features = tmp_path / f"features/static"
        static_features.mkdir(parents=True)

        with (tmp_path / f"features/{experiment}/normalizing_dict.pkl").open("wb") as f:
            pickle.dump(norm_dict, f)

        with (tmp_path / f"features/static/normalizing_dict.pkl").open("wb") as f:
            pickle.dump(static_norm_dict, f)

        x.to_netcdf(test_features / "x.nc")
        x.to_netcdf(pred_features / "x.nc")
        y.to_netcdf(test_features / "y.nc")
        y.to_netcdf(pred_features / "y.nc")
        x_static.to_netcdf(static_features / "data.nc")

        model = LinearRegression(
            tmp_path,
            include_pred_month=use_pred_months,
            experiment=experiment,
            include_monthly_aggs=monthly_agg,
            predict_delta=predict_delta,
            normalize_y=True,
        )
        model.train()

        captured = capsys.readouterr()
        expected_stdout = "Epoch 1, train RMSE: "
        assert (
            expected_stdout in captured.out
        ), f"Expected stdout to be {expected_stdout}, got {captured.out}"

        assert (
            type(model.model) == linear_model.SGDRegressor
        ), f"Model attribute not a linear regression!"

        if experiment == "nowcast":
            coef_size = (3 * 35) + 2
        elif experiment == "one_month_forecast":
            coef_size = 3 * 36
        if monthly_agg:
            # doubled including the mean, tripled including the std
            coef_size *= 2
        if use_pred_months:
            coef_size += 12

        coef_size += 3  # for the yearly aggs
        coef_size += 1  # for the static variable
        coef_size += 1  # for the prev_y_var

        assert model.model.coef_.size == coef_size, f"Got unexpected coef size"

        test_arrays_dict, preds_dict = model.predict()
        assert (
            test_arrays_dict["2001_12"]["y"].size == preds_dict["2001_12"].shape[0]
        ), "Expected length of test arrays to be the same as the predictions"

        # test saving the model outputs
        model.evaluate(save_preds=True)

        save_path = model.data_path / "models" / experiment / "linear_regression"
        assert (save_path / "preds_2001_12.nc").exists()
        assert (save_path / "results.json").exists()

        pred_ds = xr.open_dataset(save_path / "preds_2001_12.nc")
        assert np.isin(["lat", "lon", "time"], [c for c in pred_ds.coords]).all()
        assert y.time == pred_ds.time
    def test_train(self, tmp_path, capsys, use_pred_months, experiment,
                   monthly_agg):
        x, _, _ = _make_dataset(size=(5, 5), const=True)
        x_static, _, _ = _make_dataset(size=(5, 5), add_times=False)
        y = x.isel(time=[-1])

        x_add1, _, _ = _make_dataset(size=(5, 5),
                                     const=True,
                                     variable_name='precip')
        x_add2, _, _ = _make_dataset(size=(5, 5),
                                     const=True,
                                     variable_name='temp')
        x = xr.merge([x, x_add1, x_add2])

        norm_dict = {
            'VHI': {
                'mean': 0,
                'std': 1
            },
            'precip': {
                'mean': 0,
                'std': 1
            },
            'temp': {
                'mean': 0,
                'std': 1
            }
        }

        static_norm_dict = {'VHI': {'mean': 0.0, 'std': 1.0}}

        test_features = tmp_path / f'features/{experiment}/train/hello'
        test_features.mkdir(parents=True)
        pred_features = tmp_path / f'features/{experiment}/test/hello'
        pred_features.mkdir(parents=True)
        static_features = tmp_path / f'features/static'
        static_features.mkdir(parents=True)

        with (tmp_path /
              f'features/{experiment}/normalizing_dict.pkl').open('wb') as f:
            pickle.dump(norm_dict, f)

        with (tmp_path /
              f'features/static/normalizing_dict.pkl').open('wb') as f:
            pickle.dump(static_norm_dict, f)

        x.to_netcdf(test_features / 'x.nc')
        x.to_netcdf(pred_features / 'x.nc')
        y.to_netcdf(test_features / 'y.nc')
        y.to_netcdf(pred_features / 'y.nc')
        x_static.to_netcdf(static_features / 'data.nc')

        model = LinearRegression(tmp_path,
                                 include_pred_month=use_pred_months,
                                 experiment=experiment,
                                 include_monthly_aggs=monthly_agg)
        model.train()

        captured = capsys.readouterr()
        expected_stdout = 'Epoch 1, train RMSE: '
        assert expected_stdout in captured.out, \
            f'Expected stdout to be {expected_stdout}, got {captured.out}'

        assert type(model.model) == linear_model.SGDRegressor, \
            f'Model attribute not a linear regression!'

        if experiment == 'nowcast':
            coef_size = (3 * 35) + 2
        elif experiment == 'one_month_forecast':
            coef_size = (3 * 36)
        if monthly_agg:
            # doubled including the mean, tripled including the std
            coef_size *= 2
        if use_pred_months:
            coef_size += 12

        coef_size += 3  # for the yearly aggs
        coef_size += 1  # for the static variable

        assert model.model.coef_.size == coef_size, f'Got unexpected coef size'

        test_arrays_dict, preds_dict = model.predict()
        assert (
            test_arrays_dict['hello']['y'].size == preds_dict['hello'].shape[0]
        ), 'Expected length of test arrays to be the same as the predictions'

        # test saving the model outputs
        model.evaluate(save_preds=True)

        save_path = model.data_path / 'models' / experiment / 'linear_regression'
        assert (save_path / 'preds_hello.nc').exists()
        assert (save_path / 'results.json').exists()

        pred_ds = xr.open_dataset(save_path / 'preds_hello.nc')
        assert np.isin(['lat', 'lon', 'time'],
                       [c for c in pred_ds.coords]).all()
        assert y.time == pred_ds.time