def local_experiment(args: Namespace) -> None: try: from determined import experimental, load except ImportError as e: print("--local requires that the `determined` package is installed.") raise e if not args.test_mode: raise NotImplementedError( "Local training mode (--local mode without --test mode) is not yet supported. Please " "try local test mode by adding the --test flag or cluster training mode by removing " "the --local flag.") experiment_config = _parse_config_file_or_exit(args.config_file) # Python typically initializes sys.path[0] as the empty string when # invoked interactively, which directs Python to search modules in the # current directory first. However, this is _not_ happening when this # Python function is invoked via the cli. We add it manually here so # that test_one_batch can import the entrypoint by changing the # directory to model_def. # # Reference: https://docs.python.org/3/library/sys.html#sys.path with experimental._local_execution_manager(args.model_def.resolve()): trial_class = load.load_trial_implementation( experiment_config["entrypoint"]) sys.path = [""] + sys.path experimental.test_one_batch(trial_class=trial_class, config=experiment_config)
def _load_trial_on_local( context_dir: pathlib.Path, training: bool, config: Dict[str, Any], hparams: Dict[str, Any]) -> Tuple[Type[det.Trial], det.TrialContext]: with det._local_execution_manager(context_dir): trial_class = load.load_trial_implementation(config["entrypoint"]) env, rendezvous_info, hvd_config = det._make_local_execution_env( training, config, hparams) trial_context = trial_class.trial_context_class(env, hvd_config) return trial_class, trial_context
def _local_trial_from_context( context_path: pathlib.Path, config: Dict[str, Any], hparams: Dict[str, Any], ) -> det.Trial: with _local_execution_manager(context_path): checkpoint_dir = tempfile.TemporaryDirectory() trial_class = load.load_trial_implementation(config["entrypoint"]) trial = create_trial_instance(trial_class, str(checkpoint_dir), config=config, hparams=hparams) checkpoint_dir.cleanup() return trial
def load_trial_implementation_controller( env: det.EnvContext, workloads: workload.Stream, load_path: Optional[pathlib.Path], rendezvous_info: det.RendezvousInfo, hvd_config: horovod.HorovodContext, ) -> det.TrialController: trial_class = load.load_trial_implementation(env.experiment_config["entrypoint"]) return load_controller_from_trial( trial_class=trial_class, env=env, workloads=workloads, load_path=load_path, rendezvous_info=rendezvous_info, hvd_config=hvd_config, )
def test_one_batch( context_path: pathlib.Path, trial_class: Optional[Type[det.Trial]] = None, config: Optional[Dict[str, Any]] = None, ) -> None: # Override the batches_per_step value to 1. # TODO(DET-2931): Make the validation step a single batch as well. config = {**(config or {}), "batches_per_step": 1} print("Running a minimal test experiment locally") checkpoint_dir = tempfile.TemporaryDirectory() env, workloads, rendezvous_info, hvd_config = make_test_experiment_env( checkpoint_dir=pathlib.Path(checkpoint_dir.name), config=config) print(f"Using hyperparameters: {env.hparams}") if util.debug_mode(): print(f"Using a test experiment config: {env.experiment_config}") with local_execution_manager(context_path): if not trial_class: if util.debug_mode(): print("Loading trial class from experiment configuration") trial_class = load.load_trial_implementation( env.experiment_config["entrypoint"]) controller = load.load_controller_from_trial( trial_class=trial_class, env=env, workloads=workloads, load_path=None, rendezvous_info=rendezvous_info, hvd_config=hvd_config, ) controller.run() checkpoint_dir.cleanup() print( "Note: to submit an experiment to the cluster, change mode argument to Mode.CLUSTER" )
def local_experiment(args: Namespace) -> None: try: import determined as det from determined import experimental, load except ImportError as e: print("--local requires that the `determined` package is installed.") raise e if not args.test_mode: raise NotImplementedError( "Local training mode (--local mode without --test mode) is not yet supported. Please " "try local test mode by adding the --test flag or cluster training mode by removing " "the --local flag.") experiment_config = _parse_config_file_or_exit(args.config_file) determined_common.set_logger(bool(experiment_config.get("debug", False))) with det._local_execution_manager(args.model_def.resolve()): trial_class = load.load_trial_implementation( experiment_config["entrypoint"]) experimental.test_one_batch(trial_class=trial_class, config=experiment_config)