def create_results_ensemble(run_dirs: List[Path], best_k: int = None, metrics: List[str] = None, period: str = 'test', epoch: int = None) -> dict: """Average the predictions of several runs for the specified period and calculate new metrics. If `best_k` is provided, only the k runs with the best validation NSE will be used in the generated ensemble. Parameters ---------- run_dirs : List[Path] List of directories of the runs to be merged best_k : int, optional If provided, will only merge the k best runs based on validation NSE. metrics : List[str], optional Use this parameter to override the metrics from the config files in the run directories. period : {'test', 'validation', 'train'}, optional One of train, val, test. If best_k is used, only 'test' is allowed. The run_directories must contain results files for the specified period. epoch : int, optional If provided, will ensemble the model predictions of this epoch otherwise of the last epoch Returns ------- dict Dictionary of ensemble predictions and metrics per basin and frequency. """ if len(run_dirs) < 2: raise ValueError( 'Need to provide at least two run directories to be merged.') if period not in ['train', 'validation', 'test']: raise ValueError(f'Unknown period {period}.') if best_k is not None: if period != 'test': raise ValueError( 'If best_k is specified, the period must be test.') print('Searching for best validation runs.') best_val_runs = _get_best_validation_runs(run_dirs, best_k, epoch) best_runs = [ _get_results_file(run_dir, period, epoch) for run_dir in best_val_runs ] else: best_runs = [ _get_results_file(run_dir, period, epoch) for run_dir in run_dirs ] config = Config(run_dirs[0] / 'config.yml') if metrics is not None: # override metrics from config config.metrics = metrics # get frequencies from a results file. # (they might not be stored in the config if the native data frequency was used) run_results = pickle.load(open(best_runs[0], 'rb')) frequencies = list(run_results[list(run_results.keys())[0]].keys()) return _create_ensemble(best_runs, frequencies, config)
def _get_config(name): config_file = Path(f'./test/test_configs/{name}.test.yml') if not config_file.is_file(): raise ValueError(f'Test config file not found at {config_file}.') config = Config(config_file) config.run_dir = Path(tmpdir) return config
def continue_run(run_dir: Path, config_file: Path = None, gpu: int = None): """Continue model training. Parameters ---------- run_dir : Path Path to the run directory. config_file : Path, optional Path to an additional config file. Each config argument in this file will overwrite the original run config. gpu : int, optional GPU id to use. Will override config argument 'device'. A value smaller than zero indicates CPU. """ # load config from base run and overwrite all elements with an optional new config base_config = Config(run_dir / "config.yml") if config_file is not None: base_config.update_config(config_file) base_config.is_continue_training = True # check if a GPU has been specified as command line argument. If yes, overwrite config if gpu is not None and gpu >= 0: config.device = f"cuda:{gpu}" if gpu is not None and gpu < 0: config.device = "cpu" start_training(base_config)
def finetune(config_file: Path = None, gpu: int = None): """Finetune a pre-trained model. Parameters ---------- config_file : Path, optional Path to an additional config file. Each config argument in this file will overwrite the original run config. The config file for finetuning must contain the argument `base_run_dir`, pointing to the folder of the pre-trained model. gpu : int, optional GPU id to use. Will override config argument 'device'. """ # load finetune config, extract base run dir, load base run config and combine with the finetune arguments temp_config = Config(config_file) config = Config(temp_config.base_run_dir / "config.yml") config.force_update({'run_dir': None, 'experiment_name': None}) config.update_config(config_file) config.is_finetuning = True # check if a GPU has been specified as command line argument. If yes, overwrite config if gpu is not None: config.device = f"cuda:{gpu}" start_training(config)
def __init__(self, mass_input_size: int, aux_input_size: int, hidden_size: int, cfg: Config): super(_MCLSTMCell, self).__init__() self.cfg = cfg self.mass_input_size = mass_input_size self.aux_input_size = aux_input_size self.hidden_size = hidden_size _temp_dict = cfg.as_dict() self._mass_in_gates = _temp_dict["mclstm_mass_in_gates"] self._subtract_outgoing_mass = _temp_dict["subtract_outgoing_mass"] gate_inputs = aux_input_size + hidden_size if self._mass_in_gates: gate_inputs += mass_input_size # initialize gates self.output_gate = _Gate(in_features=gate_inputs, out_features=hidden_size) self.input_gate = _NormalisedGate( in_features=gate_inputs, out_shape=(mass_input_size, hidden_size), normaliser=_temp_dict["mclstm_i_normaliser"]) self.redistribution = _NormalisedGate( in_features=gate_inputs, out_shape=(hidden_size, hidden_size), normaliser=_temp_dict["mclstm_r_normaliser"]) self._reset_parameters()
def run_evaluation(run_dir: Path, epoch: Optional[int] = None, period: str = "test"): """Helper Function to run the evaluation (same as def start_evaluation: neuralhydrology/evaluation/evaluate.py:L7) Args: run_dir (Path): Path of the experiment run epoch (Optional[int], optional): Model epoch to evaluate. None finds the latest (highest) epoch. Defaults to None. period (str, optional): {"test", "train", "validation"}. Defaults to "test". """ # cfg = Config(run_dir / "config.yml") tester = Tester(cfg=cfg, run_dir=run_dir, period=period, init_model=True) if epoch is None: # get the highest epoch trained model all_trained_models = [ d.name for d in (run_dir).glob("model_epoch*.pt") ] epoch = int( sorted(all_trained_models)[-1].replace(".pt", "").replace( "model_epoch", "")) print(f"** EVALUATING MODEL EPOCH: {epoch} **") tester.evaluate(epoch=epoch, save_results=True, metrics=["NSE", "KGE"])
def test_mass_conservation(): torch.manual_seed(111) # create minimal config required for model initialization config = Config({ 'dynamic_inputs': ['tmin(C)', 'tmax(C)'], 'hidden_size': 10, 'initial_forget_bias': 0, 'mass_inputs': ['prcp(mm/day)'], 'model': 'mclstm', 'target_variables': ['QObs(mm/d)'] }) model = MCLSTM(config) # create random inputs data = { 'x_d': torch.rand( (1, 25, 3) ) # [batch size, sequence length, total number of time series inputs] } # get model outputs and intermediate states output = model(data) # the total mass within the system at each time step is the cumsum over the outgoing mass + the current cell state cumsum_system = output["m_out"].sum(-1).cumsum(-1) + output["c"].sum(-1) # the accumulated mass of the inputs at each time step cumsum_input = data["x_d"][:, :, 0].cumsum(-1) # check if the total mass is conserved at every timestep of the forward pass assert torch.allclose(cumsum_system, cumsum_input)
def start_run(config_file: Path, gpu: int = None): """Start training a model. Parameters ---------- config_file : Path Path to a configuration file (.yml), defining the settings for the specific run. gpu : int, optional GPU id to use. Will override config argument 'device'. """ config = Config(config_file) # check if a GPU has been specified as command line argument. If yes, overwrite config if gpu is not None: config.device = f"cuda:{gpu}" start_training(config)
def __init__(self, cfg: Config): self._train = True self.log_interval = cfg.log_interval self.log_dir = cfg.run_dir self._img_log_dir = cfg.img_log_dir # get git commit hash if folder is a git repository current_dir = str(Path(__file__).absolute().parent) if subprocess.call(["git", "-C", current_dir, "branch"], stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL) == 0: git_output = subprocess.check_output( ["git", "-C", current_dir, "describe", "--always"]) cfg.update_config( {'commit_hash': git_output.strip().decode('ascii')}) # Additionally, the package version is stored in the config cfg.update_config({"package_version": __version__}) # store a copy of the config into the run folder cfg.dump_config(folder=self.log_dir) self.epoch = 0 self.update = 0 self._metrics = defaultdict(list) self.writer = None
def eval_run(run_dir: Path, period: str, epoch: int = None, gpu: int = None): """Start evaluating a trained model. Parameters ---------- run_dir : Path Path to the run directory. period : {'train', 'validation', 'test'} The period to evaluate. epoch : int, optional Define a specific epoch to use. By default, the weights of the last epoch are used. gpu : int, optional GPU id to use. Will override config argument 'device'. """ config = Config(run_dir / "config.yml") # check if a GPU has been specified as command line argument. If yes, overwrite config if gpu is not None: config.device = f"cuda:{gpu}" start_evaluation(cfg=config, run_dir=run_dir, epoch=epoch, period=period)
def create_config_files(base_config_path: Path, modify_dict: Dict[str, list], output_dir: Path): """Create configs, given a base config and a dictionary of parameters to modify. This function will create one config file for each combination of parameters defined in the modify_dict. Parameters ---------- base_config_path : Path Path to a base config file (.yml) modify_dict : dict Dictionary, mapping from parameter names to lists of possible parameter values. output_dir : Path Path to a folder where the generated configs will be stored """ if not output_dir.is_dir(): output_dir.mkdir(parents=True) # load base config as dictionary base_config = Config(base_config_path) experiment_name = base_config.experiment_name option_names = list(modify_dict.keys()) # iterate over each possible combination of hyper parameters for i, options in enumerate( itertools.product(*[val for val in modify_dict.values()])): for key, val in zip(option_names, options): base_config.force_update(key=key, value=val) # create a unique run name name = experiment_name for key, val in zip(option_names, options): name += f"_{key}{val}" base_config.force_update(key="experiment_name", value=name) base_config.dump_config(output_dir, f"config_{i+1}.yml") print(f"Finished. Configs are stored in {output_dir}")
def __init__(self, cfg: Config): self._train = True self.log_interval = cfg.log_interval self.log_dir = cfg.run_dir self._img_log_dir = cfg.img_log_dir # get git commit hash if folder is a git repository cfg.update_config({'commit_hash': get_git_hash()}) # save git diff to file if branch is dirty if cfg.save_git_diff: save_git_diff(cfg.run_dir) # Additionally, the package version is stored in the config cfg.update_config({"package_version": __version__}) # store a copy of the config into the run folder cfg.dump_config(folder=self.log_dir) self.epoch = 0 self.update = 0 self._metrics = defaultdict(list) self.writer = None
def finetune(config_file: Path = None, gpu: int = None): """Finetune a pre-trained model. Parameters ---------- config_file : Path, optional Path to an additional config file. Each config argument in this file will overwrite the original run config. The config file for finetuning must contain the argument `base_run_dir`, pointing to the folder of the pre-trained model, as well as 'finetune_modules' to indicate which model parts will be trained during fine-tuning. gpu : int, optional GPU id to use. Will override config argument 'device'. A value smaller than zero indicates CPU. """ # load finetune config and check for a non-empty list of finetune_modules temp_config = Config(config_file) if not temp_config.finetune_modules: raise ValueError( "For finetuning, at least one model part has to be specified by 'finetune_modules'." ) # extract base run dir, load base run config and combine with the finetune arguments config = Config(temp_config.base_run_dir / "config.yml") config.update_config({'run_dir': None, 'experiment_name': None}) config.update_config(config_file) config.is_finetuning = True # if the base run was a continue_training run, we need to override the continue_training flag from its config. config.is_continue_training = False # check if a GPU has been specified as command line argument. If yes, overwrite config if gpu is not None and gpu >= 0: config.device = f"cuda:{gpu}" if gpu is not None and gpu < 0: config.device = "cpu" start_training(config)