コード例 #1
0
def linear_nn(
    experiment="one_month_forecast",
    include_pred_month=True,
    surrounding_pixels=None,
    explain=False,
    static="features",
    ignore_vars=None,
    num_epochs=50,
    early_stopping=5,
    layer_sizes=[100],
    predict_delta=False,
    spatial_mask=None,
    include_latlons=False,
):
    predictor = LinearNetwork(
        layer_sizes=layer_sizes,
        data_folder=get_data_path(),
        experiment=experiment,
        include_pred_month=include_pred_month,
        surrounding_pixels=surrounding_pixels,
        static=static,
        ignore_vars=ignore_vars,
        predict_delta=predict_delta,
        spatial_mask=spatial_mask,
        include_latlons=include_latlons,
    )
    predictor.train(num_epochs=num_epochs, early_stopping=early_stopping)
    predictor.evaluate(save_preds=True)
    predictor.save_model()

    if explain:
        _ = predictor.explain(save_shap_values=True)
コード例 #2
0
def linear_nn(
    experiment="one_month_forecast",
    include_pred_month=True,
    surrounding_pixels=None,
    ignore_vars=None,
    include_static=True,
):
    # if the working directory is alread ml_drought don't need ../data
    if Path(".").absolute().as_posix().split("/")[-1] == "ml_drought":
        data_path = Path("data")
    else:
        data_path = Path("../data")

    predictor = LinearNetwork(
        layer_sizes=[100],
        data_folder=data_path,
        experiment=experiment,
        include_pred_month=include_pred_month,
        surrounding_pixels=surrounding_pixels,
        ignore_vars=ignore_vars,
        include_static=include_static,
    )
    predictor.train(num_epochs=50, early_stopping=5)
    predictor.evaluate(save_preds=True)
    predictor.save_model()
コード例 #3
0
    def test_save(self, tmp_path, monkeypatch):

        layer_sizes = [10]
        input_layer_sizes = copy(layer_sizes)
        input_size = 10
        dropout = 0.25
        include_pred_month = True
        include_latlons = True
        include_monthly_aggs = True
        surrounding_pixels = 1
        ignore_vars = ['precip']
        include_yearly_aggs = True

        def mocktrain(self):
            self.model = LinearModel(input_size,
                                     layer_sizes,
                                     dropout,
                                     include_pred_month,
                                     include_latlons,
                                     include_yearly_aggs,
                                     include_static=True)
            self.input_size = input_size

        monkeypatch.setattr(LinearNetwork, 'train', mocktrain)

        model = LinearNetwork(data_folder=tmp_path,
                              layer_sizes=layer_sizes,
                              dropout=dropout,
                              experiment='one_month_forecast',
                              include_pred_month=include_pred_month,
                              include_latlons=include_latlons,
                              include_monthly_aggs=include_monthly_aggs,
                              include_yearly_aggs=include_yearly_aggs,
                              surrounding_pixels=surrounding_pixels,
                              ignore_vars=ignore_vars)
        model.train()
        model.save_model()

        assert (tmp_path / 'models/one_month_forecast/linear_network/model.pt'
                ).exists(), f'Model not saved!'

        model_dict = torch.load(model.model_dir / 'model.pt',
                                map_location='cpu')

        for key, val in model_dict['model']['state_dict'].items():
            assert (model.model.state_dict()[key] == val).all()

        assert model_dict['dropout'] == dropout
        assert model_dict['layer_sizes'] == input_layer_sizes
        assert model_dict['model']['input_size'] == input_size
        assert model_dict['include_pred_month'] == include_pred_month
        assert model_dict['include_latlons'] == include_latlons
        assert model_dict['include_monthly_aggs'] == include_monthly_aggs
        assert model_dict['include_yearly_aggs'] == include_yearly_aggs
        assert model_dict['surrounding_pixels'] == surrounding_pixels
        assert model_dict['ignore_vars'] == ignore_vars
コード例 #4
0
def linear_nn(experiment='one_month_forecast',
              include_pred_month=True,
              surrounding_pixels=1):
    # if the working directory is alread ml_drought don't need ../data
    if Path('.').absolute().as_posix().split('/')[-1] == 'ml_drought':
        data_path = Path('data')
    else:
        data_path = Path('../data')

    predictor = LinearNetwork(layer_sizes=[100],
                              data_folder=data_path,
                              experiment=experiment,
                              include_pred_month=include_pred_month,
                              surrounding_pixels=surrounding_pixels)
    predictor.train(num_epochs=50, early_stopping=5)
    predictor.evaluate(save_preds=True)
    predictor.save_model()

    _ = predictor.explain(save_shap_values=True)
コード例 #5
0
ファイル: models.py プロジェクト: Akumenyi/ml_drought
def linear_nn(
    experiment="one_month_forecast",
    include_pred_month=True,
    surrounding_pixels=None,
    ignore_vars=None,
    pretrained=False,
):
    predictor = LinearNetwork(
        layer_sizes=[100],
        data_folder=get_data_path(),
        experiment=experiment,
        include_pred_month=include_pred_month,
        surrounding_pixels=surrounding_pixels,
        ignore_vars=ignore_vars,
    )
    predictor.train(num_epochs=50, early_stopping=5)
    predictor.evaluate(save_preds=True)
    predictor.save_model()

    _ = predictor.explain(save_shap_values=True)
コード例 #6
0
    def test_train(
        self,
        tmp_path,
        capsys,
        use_pred_months,
        use_latlons,
        experiment,
        monthly_agg,
        static,
        predict_delta,
    ):
        # make the x, y data (5*5 latlons, 36 timesteps, 3 features)
        x, _, _ = _make_dataset(size=(5, 5), const=True)
        y = x.isel(time=[-1])

        x_add1, _, _ = _make_dataset(size=(5, 5),
                                     const=True,
                                     variable_name="precip")
        x_add2, _, _ = _make_dataset(size=(5, 5),
                                     const=True,
                                     variable_name="temp")
        x = xr.merge([x, x_add1, x_add2])

        norm_dict = {
            "VHI": {
                "mean": 0,
                "std": 1
            },
            "precip": {
                "mean": 0,
                "std": 1
            },
            "temp": {
                "mean": 0,
                "std": 1
            },
        }

        test_features = tmp_path / f"features/{experiment}/train/1980_1"
        test_features.mkdir(parents=True, exist_ok=True)

        # make the normalising dictionary
        with (tmp_path /
              f"features/{experiment}/normalizing_dict.pkl").open("wb") as f:
            pickle.dump(norm_dict, f)

        x.to_netcdf(test_features / "x.nc")
        y.to_netcdf(test_features / "y.nc")

        if static:
            x_static, _, _ = _make_dataset(size=(5, 5), add_times=False)
            static_features = tmp_path / f"features/static"
            static_features.mkdir(parents=True)
            x_static.to_netcdf(static_features / "data.nc")

            static_norm_dict = {"VHI": {"mean": 0.0, "std": 1.0}}
            with (tmp_path /
                  f"features/static/normalizing_dict.pkl").open("wb") as f:
                pickle.dump(static_norm_dict, f)

        layer_sizes = [10]
        dropout = 0.25

        model = LinearNetwork(
            data_folder=tmp_path,
            layer_sizes=layer_sizes,
            dropout=dropout,
            experiment=experiment,
            include_pred_month=use_pred_months,
            include_latlons=use_latlons,
            include_monthly_aggs=monthly_agg,
            static="embeddings",
            predict_delta=predict_delta,
        )

        model.train()

        captured = capsys.readouterr()
        expected_stdout = "Epoch 1, train smooth L1: "
        assert expected_stdout in captured.out

        assert (type(model.model) == LinearModel
                ), f"Model attribute not a linear regression!"
コード例 #7
0
    def test_predict(self, tmp_path, use_pred_months, use_latlons, experiment):
        x, _, _ = _make_dataset(size=(5, 5), const=True)
        y = x.isel(time=[-1])

        train_features = tmp_path / f"features/{experiment}/train/1980_1"
        train_features.mkdir(parents=True)

        test_features = tmp_path / f"features/{experiment}/test/1980_1"
        test_features.mkdir(parents=True)

        # static
        x_static, _, _ = _make_dataset(size=(5, 5), add_times=False)
        static_features = tmp_path / f"features/static"
        static_features.mkdir(parents=True)
        x_static.to_netcdf(static_features / "data.nc")

        static_norm_dict = {"VHI": {"mean": 0.0, "std": 1.0}}
        with (tmp_path /
              f"features/static/normalizing_dict.pkl").open("wb") as f:
            pickle.dump(static_norm_dict, f)

        # if nowcast we need another x feature
        if experiment == "nowcast":
            x_add1, _, _ = _make_dataset(size=(5, 5),
                                         const=True,
                                         variable_name="precip")
            x_add2, _, _ = _make_dataset(size=(5, 5),
                                         const=True,
                                         variable_name="temp")
            x = xr.merge([x, x_add1, x_add2])

            norm_dict = {
                "VHI": {
                    "mean": 0,
                    "std": 1
                },
                "precip": {
                    "mean": 0,
                    "std": 1
                },
                "temp": {
                    "mean": 0,
                    "std": 1
                },
            }
        else:
            norm_dict = {"VHI": {"mean": 0, "std": 1}}

        with (tmp_path /
              f"features/{experiment}/normalizing_dict.pkl").open("wb") as f:
            pickle.dump(norm_dict, f)

        x.to_netcdf(test_features / "x.nc")
        y.to_netcdf(test_features / "y.nc")

        x.to_netcdf(train_features / "x.nc")
        y.to_netcdf(train_features / "y.nc")

        layer_sizes = [10]
        dropout = 0.25

        model = LinearNetwork(
            data_folder=tmp_path,
            layer_sizes=layer_sizes,
            dropout=dropout,
            experiment=experiment,
            include_pred_month=use_pred_months,
            include_latlons=use_latlons,
        )
        model.train()
        test_arrays_dict, pred_dict = model.predict()

        # the foldername "1980_1" is the only one which should be in the dictionaries
        assert ("1980_1" in test_arrays_dict.keys()) and (len(test_arrays_dict)
                                                          == 1)
        assert ("1980_1" in pred_dict.keys()) and (len(pred_dict) == 1)

        # _make_dataset with const=True returns all ones
        assert (test_arrays_dict["1980_1"]["y"] == 1).all()
コード例 #8
0
    def test_save(self, tmp_path, monkeypatch):

        layer_sizes = [10]
        input_layer_sizes = copy(layer_sizes)
        input_size = 10
        dropout = 0.25
        include_pred_month = True
        include_latlons = True
        include_monthly_aggs = True
        surrounding_pixels = 1
        ignore_vars = ["precip"]
        include_yearly_aggs = True
        normalize_y = False

        def mocktrain(self):
            self.model = LinearModel(
                input_size,
                layer_sizes,
                dropout,
                include_pred_month,
                include_latlons,
                include_yearly_aggs,
                include_static=True,
                include_prev_y=True,
            )
            self.input_size = input_size

        monkeypatch.setattr(LinearNetwork, "train", mocktrain)

        model = LinearNetwork(
            data_folder=tmp_path,
            layer_sizes=layer_sizes,
            dropout=dropout,
            experiment="one_month_forecast",
            include_pred_month=include_pred_month,
            include_latlons=include_latlons,
            include_monthly_aggs=include_monthly_aggs,
            include_yearly_aggs=include_yearly_aggs,
            surrounding_pixels=surrounding_pixels,
            ignore_vars=ignore_vars,
            include_prev_y=True,
            normalize_y=normalize_y,
        )
        model.train()
        model.save_model()

        assert (tmp_path / "models/one_month_forecast/linear_network/model.pt"
                ).exists(), f"Model not saved!"

        model_dict = torch.load(model.model_dir / "model.pt",
                                map_location="cpu")

        for key, val in model_dict["model"]["state_dict"].items():
            assert (model.model.state_dict()[key] == val).all()

        assert model_dict["dropout"] == dropout
        assert model_dict["layer_sizes"] == input_layer_sizes
        assert model_dict["model"]["input_size"] == input_size
        assert model_dict["include_pred_month"] == include_pred_month
        assert model_dict["include_latlons"] == include_latlons
        assert model_dict["include_monthly_aggs"] == include_monthly_aggs
        assert model_dict["include_yearly_aggs"] == include_yearly_aggs
        assert model_dict["surrounding_pixels"] == surrounding_pixels
        assert model_dict["ignore_vars"] == ignore_vars
        assert model_dict["include_prev_y"] is True
        assert model_dict["normalize_y"] == normalize_y
コード例 #9
0
# train models
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from src.models import LinearRegression, LinearNetwork, Persistence
from src.models.data import DataLoader

data_path = Path("data")
l = LinearRegression(data_path)
l.train()

ln = LinearNetwork(layer_sizes=[100], data_folder=data_path)
ln.train(num_epochs=10)

# ------------------------------------------------------------------------------
# try and explain the LinearRegression model
# ------------------------------------------------------------------------------
test_arrays_loader = DataLoader(
    data_path=data_path, batch_file_size=1, shuffle_data=False, mode="test"
)
key, val = list(next(iter(test_arrays_loader)).items())[0]
explanations = l.explain(val.x)

# plot the SHAP explanations

# 1. mean spatial and temporal response
mean_expl = explanations.mean(axis=0).mean(axis=0)
x_vars = val.x_vars
コード例 #10
0
    def test_train(self, tmp_path, capsys, use_pred_months, use_latlons,
                   experiment, monthly_agg, static):
        # make the x, y data (5*5 latlons, 36 timesteps, 3 features)
        x, _, _ = _make_dataset(size=(5, 5), const=True)
        y = x.isel(time=[-1])

        x_add1, _, _ = _make_dataset(size=(5, 5),
                                     const=True,
                                     variable_name='precip')
        x_add2, _, _ = _make_dataset(size=(5, 5),
                                     const=True,
                                     variable_name='temp')
        x = xr.merge([x, x_add1, x_add2])

        norm_dict = {
            'VHI': {
                'mean': 0,
                'std': 1
            },
            'precip': {
                'mean': 0,
                'std': 1
            },
            'temp': {
                'mean': 0,
                'std': 1
            }
        }

        test_features = tmp_path / f'features/{experiment}/train/hello'
        test_features.mkdir(parents=True, exist_ok=True)

        # make the normalising dictionary
        with (tmp_path /
              f'features/{experiment}/normalizing_dict.pkl').open('wb') as f:
            pickle.dump(norm_dict, f)

        x.to_netcdf(test_features / 'x.nc')
        y.to_netcdf(test_features / 'y.nc')

        if static:
            x_static, _, _ = _make_dataset(size=(5, 5), add_times=False)
            static_features = tmp_path / f'features/static'
            static_features.mkdir(parents=True)
            x_static.to_netcdf(static_features / 'data.nc')

            static_norm_dict = {'VHI': {'mean': 0.0, 'std': 1.0}}
            with (tmp_path /
                  f'features/static/normalizing_dict.pkl').open('wb') as f:
                pickle.dump(static_norm_dict, f)

        layer_sizes = [10]
        dropout = 0.25

        model = LinearNetwork(data_folder=tmp_path,
                              layer_sizes=layer_sizes,
                              dropout=dropout,
                              experiment=experiment,
                              include_pred_month=use_pred_months,
                              include_latlons=use_latlons,
                              include_monthly_aggs=monthly_agg,
                              include_static=static)

        model.train()

        # check the number of input features is properly initialised
        n_input_features = [p for p in model.model.dense_layers.parameters()
                            ][0].shape[-1]

        # Expect to have 12 more features if use_pred_months
        if experiment == 'nowcast':
            n_expected = 107
        else:
            # NOTE: data hasn't been through `src.Engineer` therefore including
            #  current data (hence why more features than `nowcast`)
            n_expected = 108

        if monthly_agg:
            n_expected *= 2
        if use_pred_months:
            n_expected += 12
        if use_latlons:
            n_expected += 2

        n_expected += 3  # +3 for the yearly means

        if static:
            n_expected += 1  # for the static variable

        assert n_input_features == n_expected, "Expected the number" \
            f"of input features to be: {n_expected}" \
            f"Got: {n_input_features}"

        captured = capsys.readouterr()
        expected_stdout = 'Epoch 1, train smooth L1: '
        assert expected_stdout in captured.out

        assert type(model.model) == LinearModel, \
            f'Model attribute not a linear regression!'
コード例 #11
0
    def test_predict(self, tmp_path, use_pred_months, use_latlons, experiment):
        x, _, _ = _make_dataset(size=(5, 5), const=True)
        y = x.isel(time=[-1])

        train_features = tmp_path / f'features/{experiment}/train/hello'
        train_features.mkdir(parents=True)

        test_features = tmp_path / f'features/{experiment}/test/hello'
        test_features.mkdir(parents=True)

        # static
        x_static, _, _ = _make_dataset(size=(5, 5), add_times=False)
        static_features = tmp_path / f'features/static'
        static_features.mkdir(parents=True)
        x_static.to_netcdf(static_features / 'data.nc')

        static_norm_dict = {'VHI': {'mean': 0.0, 'std': 1.0}}
        with (tmp_path /
              f'features/static/normalizing_dict.pkl').open('wb') as f:
            pickle.dump(static_norm_dict, f)

        # if nowcast we need another x feature
        if experiment == 'nowcast':
            x_add1, _, _ = _make_dataset(size=(5, 5),
                                         const=True,
                                         variable_name='precip')
            x_add2, _, _ = _make_dataset(size=(5, 5),
                                         const=True,
                                         variable_name='temp')
            x = xr.merge([x, x_add1, x_add2])

            norm_dict = {
                'VHI': {
                    'mean': 0,
                    'std': 1
                },
                'precip': {
                    'mean': 0,
                    'std': 1
                },
                'temp': {
                    'mean': 0,
                    'std': 1
                }
            }
        else:
            norm_dict = {'VHI': {'mean': 0, 'std': 1}}

        with (tmp_path /
              f'features/{experiment}/normalizing_dict.pkl').open('wb') as f:
            pickle.dump(norm_dict, f)

        x.to_netcdf(test_features / 'x.nc')
        y.to_netcdf(test_features / 'y.nc')

        x.to_netcdf(train_features / 'x.nc')
        y.to_netcdf(train_features / 'y.nc')

        layer_sizes = [10]
        dropout = 0.25

        model = LinearNetwork(data_folder=tmp_path,
                              layer_sizes=layer_sizes,
                              dropout=dropout,
                              experiment=experiment,
                              include_pred_month=use_pred_months,
                              include_latlons=use_latlons)
        model.train()
        test_arrays_dict, pred_dict = model.predict()

        # the foldername "hello" is the only one which should be in the dictionaries
        assert ('hello' in test_arrays_dict.keys()) and (len(test_arrays_dict)
                                                         == 1)
        assert ('hello' in pred_dict.keys()) and (len(pred_dict) == 1)

        # _make_dataset with const=True returns all ones
        assert (test_arrays_dict['hello']['y'] == 1).all()