def test_predict(self, tmp_path):
        self._create_train_samples(tmp_path, 12)

        # TEST with a random nan value included!
        x, _, _ = _make_dataset(size=(5, 5), random_nan=1)
        y = x.isel(time=[-1])

        test_features = tmp_path / "features/one_month_forecast/test/1980_1"
        test_features.mkdir(parents=True)

        x.to_netcdf(test_features / "x.nc")
        y.to_netcdf(test_features / "y.nc")

        predictor = Climatology(tmp_path)

        test_arrays, preds = predictor.predict()

        assert (test_arrays["1980_1"]["y"].shape == preds["1980_1"].shape
                ), f"Shape of climatology is incorrect!"

        # calculate climatology
        _, y_train = read_train_data(tmp_path)
        ds = y_train
        nan_mask = test_arrays["1980_1"]["nan_mask"]

        # check that the nan mask is 1 (the random nan value we included!)
        assert nan_mask.sum() == 1

        assert (preds["1980_1"].flatten(
        ) == ds["VHI"]["time.month" == 1].values.flatten()[~nan_mask]).all(
        ), "Expect the month mean to be the calculated from the training data"
Beispiel #2
0
    def test_predict(self, tmp_path):
        self._create_train_samples(tmp_path, 12)

        x, _, _ = _make_dataset(size=(5, 5))
        y = x.isel(time=[-1])

        test_features = tmp_path / "features/one_month_forecast/test/1980_1"
        test_features.mkdir(parents=True)

        x.to_netcdf(test_features / "x.nc")
        y.to_netcdf(test_features / "y.nc")

        predictor = Climatology(tmp_path)

        test_arrays, preds = predictor.predict()

        assert (test_arrays["1980_1"]["y"].shape == preds["1980_1"].shape
                ), f"Shape of climatology is incorrect!"

        # calculate climatology
        _, y_train = read_train_data(tmp_path)
        ds = y_train

        assert (
            preds["1980_1"].reshape(5,
                                    5) == ds["VHI"]["time.month" == 1].values
        ).all(
        ), "Expect the month mean to be the calculated from the training data"
Beispiel #3
0
def read_all_data(data_dir: Path,
                  experiment="one_month_forecast",
                  static: bool = False) -> Tuple[xr.Dataset]:
    X_train, y_train = read_train_data(data_dir, experiment=experiment)
    X_test, y_test = read_test_data(data_dir, experiment=experiment)

    if static:
        static_ds = xr.open_dataset(data_dir / "features/static/data.nc")
    return (X_train, y_train, X_test, y_test)
Beispiel #4
0
    def predict(
        self,
        all_data: bool = False
    ) -> Tuple[Dict[str, Dict[str, np.ndarray]], Dict[str, np.ndarray]]:

        _, y_train = read_train_data(self.data_path)
        ds = y_train

        if all_data:
            # if want to calculate climatology for train+test data
            _, y_test = read_test_data(self.data_path)
            ds = xr.merge([y_train, y_test]).sortby("time").sortby("lat")

        target_var = [v for v in ds.data_vars][0]

        # calculate climatology:
        monmean = ds.groupby("time.month").mean(dim=["time"])[target_var]

        test_arrays_loader = self.get_dataloader(mode="test",
                                                 shuffle_data=False,
                                                 normalize=False,
                                                 static=False)

        preds_dict: Dict[str, np.ndarray] = {}
        test_arrays_dict: Dict[str, Dict[str, np.ndarray]] = {}
        for dict in test_arrays_loader:
            for key, val in dict.items():
                try:
                    _ = val.x_vars.index(val.y_var)
                except ValueError as e:
                    print("Target variable not in prediction data!")
                    raise e

                preds_dict[key] = monmean.sel(
                    month=val.target_time.month).values.reshape(val.y.shape)

                test_arrays_dict[key] = {
                    "y": val.y,
                    "latlons": val.latlons,
                    "time": val.target_time,
                    "y_var": val.y_var,
                }

        return test_arrays_dict, preds_dict
Beispiel #5
0
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import sys

%load_ext autoreload
%autoreload 2
%matplotlib

data_dir = data_path = Path('data')
data_dir = data_path = Path('/Volumes/Lees_Extend/data/ecmwf_sowc/data')
sys.path.append('/Users/tommylees/github/ml_drought')


# load model
from src.models import load_model

model_path = data_dir / 'models/one_month_forecast/ealstm/model.pt'
assert model_path.exists()

ealstm = load_model(model_path)

# load X / Y data
from src.analysis import read_train_data, read_test_data
X_train, y_train = read_train_data(data_dir)
X_test, y_test = read_test_data(data_dir)

Beispiel #6
0
import xarray as xr
import pandas as pd
from geopandas import GeoDataFrame
import pickle
from pathlib import Path
data_dir = Path('/Volumes/Lees_Extend/data/ecmwf_sowc/data')

from src.analysis.region_analysis.groupby_region import KenyaGroupbyRegion
from src.analysis.region_analysis.groupby_region import GroupbyRegion
from src.analysis import read_train_data

# ------------------------
# Read the training data
# ------------------------
X, y = read_train_data(data_dir)

# extract mean values for each region for each variable
region_grouper = KenyaGroupbyRegion(data_dir=data_dir)
region_precip_df = region_grouper.analyze(X.precip, selection='level_2')
region_precip_gdf = region_grouper.gdf.rename(columns={'mean_value': 'precip'})

region_grouper = KenyaGroupbyRegion(data_dir=data_dir)
region_E_df = region_grouper.analyze(X.E, selection='level_2')
region_E_gdf = region_grouper.gdf.rename(columns={'mean_value': 'E'})

region_grouper = KenyaGroupbyRegion(data_dir=data_dir)
region_SMsurf_df = region_grouper.analyze(X.SMsurf, selection='level_2')
region_SMsurf_gdf = region_grouper.gdf.rename(columns={'mean_value': 'SMsurf'})

region_grouper = KenyaGroupbyRegion(data_dir=data_dir)
region_VCI_df = region_grouper.analyze(y.VCI, selection='level_2')
from src.engineer import Engineer

# # e = Engineer(data_dir)
# data = e.engineer_class._make_dataset(static=False)

from src.analysis import read_train_data, read_test_data
from src.analysis.indices.utils import rolling_mean

boku = True

if boku:
    experiment = "one_month_forecast_BOKU_boku_VCI"
else:
    experiment = "one_month_forecast"  # "one_month_forecast_BOKU_boku_VCI"

X_train, y_train = read_train_data(data_dir, experiment=experiment)
X_test, y_test = read_test_data(data_dir, experiment=experiment)
ds = xr.merge([y_train, y_test]).sortby("time").sortby("lat")
d_ = xr.merge([X_train, X_test]).sortby("time").sortby("lat")
ds = xr.merge([ds, d_])

# ----------------------------------------
# Create the features (pixel-by-pixel)
# ----------------------------------------
"""
NOTE: Nasty hack

the indices.spi computation sometimes collapses the dimensionality
of the groupby object

~/miniconda3/envs/crop/lib/python3.7/site-packages/xarray/core/computation.py in apply_variable_ufunc(func, signature, exclude_dims, dask, output_dtypes, output_sizes, keep_attrs, *args)