def engineer(data_path, experiment='one_month_forecast', process_static=True,
             pred_months=12, expected_length=12):
    engineer = Engineer(data_path, experiment=experiment, process_static=process_static)
    engineer.engineer(
        test_year=2018, target_variable='VHI',
        pred_months=pred_months, expected_length=pred_months,
    )
Exemple #2
0
def engineer(pred_months=3, target_var="VCI1M"):
    engineer = Engineer(get_data_path(),
                        experiment="one_month_forecast",
                        process_static=False)
    engineer.engineer(
        test_year=[y for y in range(2016, 2019)],
        target_variable=target_var,
        pred_months=pred_months,
        expected_length=pred_months,
    )
Exemple #3
0
def eng_strato():
    # if the working directory is alread ml_drought don't need ../data
    if Path('.').absolute().as_posix().split('/')[-1] == 'ml_drought':
        data_path = Path('data')
    else:
        data_path = Path('../data')

    engineer = Engineer(data_path, experiment='strato')
    engineer.engineer(
        test_year=2018,
        target_variable='u',
    )
Exemple #4
0
def engineer(experiment="one_month_forecast",
             process_static=True,
             pred_months=12):

    engineer = Engineer(get_data_path(),
                        experiment=experiment,
                        process_static=process_static)
    engineer.engineer(
        test_year=[y for y in range(2011, 2019)],
        target_variable="VCI",
        pred_months=pred_months,
        expected_length=pred_months,
    )
Exemple #5
0
def engineer(experiment='one_month_forecast', process_static=True,
             pred_months=12):
    # if the working directory is alread ml_drought don't need ../data
    if Path('.').absolute().as_posix().split('/')[-1] == 'ml_drought':
        data_path = Path('data')
    else:
        data_path = Path('../data')

    engineer = Engineer(data_path, experiment=experiment, process_static=process_static)
    engineer.engineer(
        test_year=2018, target_variable='VCI',
        pred_months=pred_months, expected_length=pred_months,
    )
    def test_engineer(self, tmp_path):

        _setup(tmp_path)

        pred_months = expected_length = 11

        engineer = Engineer(tmp_path)
        engineer.engineer(
            test_year=2001,
            target_variable="a",
            pred_months=pred_months,
            expected_length=expected_length,
        )

        def check_folder(folder_path):
            y = xr.open_dataset(folder_path / "y.nc")
            assert "b" not in set(
                y.variables), "Got unexpected variables in test set"

            x = xr.open_dataset(folder_path / "x.nc")
            for expected_var in {"a", "b"}:
                assert expected_var in set(
                    x.variables), "Missing variables in testing input dataset"
            assert (len(x.time.values) == expected_length
                    ), "Wrong number of months in the test x dataset"
            assert len(
                y.time.values) == 1, "Wrong number of months in test y dataset"

        # check_folder(tmp_path / 'features/one_month_forecast/train/1999_12')
        for month in range(1, 13):
            check_folder(tmp_path /
                         f"features/one_month_forecast/test/2001_{month}")
            check_folder(tmp_path /
                         f"features/one_month_forecast/train/2000_{month}")

        assert (len(
            list((tmp_path / "features/one_month_forecast/train"
                  ).glob("2001_*"))) == 0), "Test data in the training data!"

        assert (tmp_path / "features/one_month_forecast/normalizing_dict.pkl"
                ).exists(), f"Normalizing dict not saved!"
        with (tmp_path / "features/one_month_forecast/normalizing_dict.pkl"
              ).open("rb") as f:
            norm_dict = pickle.load(f)

        for key, val in norm_dict.items():
            assert key in {"a", "b"}, f"Unexpected key!"
            assert norm_dict[key]["mean"] == 1, f"Mean incorrectly calculated!"
            assert norm_dict[key]["std"] == 0, f"Std incorrectly calculated!"
def engineer(
    pred_months=3,
    target_var="boku_VCI",
    process_static=False,
    global_means: bool = True,
    log_vars: Optional[List[str]] = None,
):
    engineer = Engineer(get_data_path(),
                        experiment="one_month_forecast",
                        process_static=process_static)
    engineer.engineer(
        test_year=[y for y in range(2016, 2019)],
        target_variable=target_var,
        pred_months=pred_months,
        expected_length=pred_months,
        global_means=global_means,
    )
Exemple #8
0
from collections import defaultdict
import calendar
from datetime import datetime, date
from pathlib import Path
import xarray as xr

from typing import cast, Dict, List, Optional, Union, Tuple
from typing import DefaultDict as DDict

from src.engineer import Engineer
from src.preprocess.base import BasePreProcessor

data_path = Path("/Volumes/Lees_Extend/data/ecmwf_sowc/data")
engineer = Engineer(data_path)
engineer.engineer(test_year=1990,
                  target_variable="VHI",
                  pred_months=3,
                  expected_length=3)

# wrong shapes!
datasets = engineer._get_preprocessed_files()
ds_list = [xr.open_dataset(ds) for ds in datasets]
dims_list = [[dim for dim in ds.dims] for ds in ds_list]
variable_list = [[var for var in ds.variables if var not in dims_list[i]][0]
                 for i, ds in enumerate(ds_list)]
da_list = [ds[variable_list[i]] for i, ds in enumerate(ds_list)]

#
ds = engineer._make_dataset()
years = [1990]
train_ds, test_dict
train_ds = engineer._train_test_split(ds,
Exemple #9
0
 def engineer(self, engineer_args: Dict) -> None:
     """Run the engineer on the data
     """
     engineer_args["init_args"]["data_folder"] = self.data
     engineer = Engineer(**engineer_args["init_args"])
     engineer.engineer(**engineer_args["run_args"])