Esempio n. 1
0
 def _get_latest_checkpoint(self,
                            experiment_checkpoint_path: str) -> List[str]:
     if os.path.isdir(experiment_checkpoint_path):
         # Case 1: Dir specified, find latest checkpoint.
         latest_checkpoint = find_newest_experiment_checkpoint(
             experiment_checkpoint_path)
         if not latest_checkpoint:
             latest_checkpoint = []
             for fname in os.listdir(experiment_checkpoint_path):
                 fname = os.path.join(experiment_checkpoint_path, fname)
                 latest_checkpoint_subdir = find_newest_experiment_checkpoint(
                     fname)
                 if latest_checkpoint_subdir:
                     latest_checkpoint.append(latest_checkpoint_subdir)
         if not latest_checkpoint:
             raise ValueError(
                 f"The directory `{experiment_checkpoint_path}` does not "
                 f"contain a Ray Tune experiment checkpoint.")
     elif not os.path.isfile(experiment_checkpoint_path):
         # Case 2: File specified, but does not exist.
         raise ValueError(
             f"The file `{experiment_checkpoint_path}` does not "
             f"exist and cannot be loaded for experiment analysis.")
     else:
         # Case 3: File specified, use as latest checkpoint.
         latest_checkpoint = experiment_checkpoint_path
     if not isinstance(latest_checkpoint, list):
         latest_checkpoint = [latest_checkpoint]
     return latest_checkpoint
Esempio n. 2
0
 def _get_latest_checkpoint(self,
                            experiment_checkpoint_path: Path) -> List[str]:
     # Case 1: Dir specified, find latest checkpoint.
     if experiment_checkpoint_path.is_dir():
         latest_checkpoint = find_newest_experiment_checkpoint(
             str(experiment_checkpoint_path))
         # If no checkpoint in this folder the sub-directory is searched.
         # In this case also multiple experiment folders could exist in
         # the same root. In this case the length of `latest_checkpoint`
         # will be greater than 1.
         if not latest_checkpoint:
             latest_checkpoint = []
             for fname in experiment_checkpoint_path.iterdir():
                 fname = experiment_checkpoint_path.joinpath(fname)
                 latest_checkpoint_subdir = find_newest_experiment_checkpoint(
                     str(fname))
                 if latest_checkpoint_subdir:
                     latest_checkpoint.append(latest_checkpoint_subdir)
         if not latest_checkpoint:
             # This avoid nested experiment directories of the form
             # `experiment_name1/experiment_name2/experiment_state.json`.
             experiment_checkpoint_path = str(experiment_checkpoint_path)
             raise ValueError(
                 f"The directory `{experiment_checkpoint_path}` does not "
                 "contain a Ray Tune experiment checkpoint.")
     elif not experiment_checkpoint_path.is_file():
         # Case 2: File specified, but does not exist.
         experiment_checkpoint_path = str(experiment_checkpoint_path)
         raise ValueError(
             f"The file `{experiment_checkpoint_path}` does not "
             f"exist and cannot be loaded for experiment analysis.")
     else:
         # Case 3: File specified, use as latest checkpoint.
         latest_checkpoint = str(experiment_checkpoint_path)
     if not isinstance(latest_checkpoint, list):
         latest_checkpoint = [latest_checkpoint]
     return latest_checkpoint
Esempio n. 3
0
def load_experiment_checkpoint_from_state_file(
        experiment_dir: str) -> ExperimentStateCheckpoint:
    newest_ckpt_path = find_newest_experiment_checkpoint(experiment_dir)
    with open(newest_ckpt_path, "r") as f:
        runner_state = json.load(f, cls=TuneFunctionDecoder)

    trials = []
    for trial_cp_str in runner_state["checkpoints"]:
        parsed = json.loads(trial_cp_str, cls=TuneFunctionDecoder)
        trial = TrialStub(**parsed)
        trials.append(trial)

    runner_data = runner_state["runner_data"]

    return ExperimentStateCheckpoint(runner_data, trials)