def test_get_preprocessed(self, tmp_path, monkeypatch): expected_files, expected_vars = _setup(tmp_path) def mock_init(self, data_folder): self.name = "dummy" self.interim_folder = data_folder / "interim" monkeypatch.setattr(Engineer, "__init__", mock_init) engineer = Engineer(tmp_path) files = engineer._get_preprocessed_files(static=False) assert set(expected_files) == set( files), f"Did not retrieve expected files!"
from typing import cast, Dict, List, Optional, Union, Tuple from typing import DefaultDict as DDict from src.engineer import Engineer from src.preprocess.base import BasePreProcessor data_path = Path("/Volumes/Lees_Extend/data/ecmwf_sowc/data") engineer = Engineer(data_path) engineer.engineer(test_year=1990, target_variable="VHI", pred_months=3, expected_length=3) # wrong shapes! datasets = engineer._get_preprocessed_files() ds_list = [xr.open_dataset(ds) for ds in datasets] dims_list = [[dim for dim in ds.dims] for ds in ds_list] variable_list = [[var for var in ds.variables if var not in dims_list[i]][0] for i, ds in enumerate(ds_list)] da_list = [ds[variable_list[i]] for i, ds in enumerate(ds_list)] # ds = engineer._make_dataset() years = [1990] train_ds, test_dict train_ds = engineer._train_test_split(ds, years, target_variable="VHI", pred_months=3, expected_length=3)