Ejemplo n.º 1
0
def local_experiment(args: Namespace) -> None:
    try:
        from determined import experimental, load
    except ImportError as e:
        print("--local requires that the `determined` package is installed.")
        raise e

    if not args.test_mode:
        raise NotImplementedError(
            "Local training mode (--local mode without --test mode) is not yet supported. Please "
            "try local test mode by adding the --test flag or cluster training mode by removing "
            "the --local flag.")

    experiment_config = _parse_config_file_or_exit(args.config_file)

    # Python typically initializes sys.path[0] as the empty string when
    # invoked interactively, which directs Python to search modules in the
    # current directory first. However, this is _not_ happening when this
    # Python function is invoked via the cli. We add it manually here so
    # that test_one_batch can import the entrypoint by changing the
    # directory to model_def.
    #
    # Reference: https://docs.python.org/3/library/sys.html#sys.path

    with experimental._local_execution_manager(args.model_def.resolve()):
        trial_class = load.load_trial_implementation(
            experiment_config["entrypoint"])
        sys.path = [""] + sys.path
        experimental.test_one_batch(trial_class=trial_class,
                                    config=experiment_config)
Ejemplo n.º 2
0
def _load_trial_on_local(
        context_dir: pathlib.Path, training: bool, config: Dict[str, Any],
        hparams: Dict[str, Any]) -> Tuple[Type[det.Trial], det.TrialContext]:
    with det._local_execution_manager(context_dir):
        trial_class = load.load_trial_implementation(config["entrypoint"])
        env, rendezvous_info, hvd_config = det._make_local_execution_env(
            training, config, hparams)
        trial_context = trial_class.trial_context_class(env, hvd_config)
    return trial_class, trial_context
Ejemplo n.º 3
0
def _local_trial_from_context(
    context_path: pathlib.Path,
    config: Dict[str, Any],
    hparams: Dict[str, Any],
) -> det.Trial:
    with _local_execution_manager(context_path):
        checkpoint_dir = tempfile.TemporaryDirectory()

        trial_class = load.load_trial_implementation(config["entrypoint"])
        trial = create_trial_instance(trial_class,
                                      str(checkpoint_dir),
                                      config=config,
                                      hparams=hparams)

        checkpoint_dir.cleanup()
        return trial
def load_trial_implementation_controller(
    env: det.EnvContext,
    workloads: workload.Stream,
    load_path: Optional[pathlib.Path],
    rendezvous_info: det.RendezvousInfo,
    hvd_config: horovod.HorovodContext,
) -> det.TrialController:
    trial_class = load.load_trial_implementation(env.experiment_config["entrypoint"])
    return load_controller_from_trial(
        trial_class=trial_class,
        env=env,
        workloads=workloads,
        load_path=load_path,
        rendezvous_info=rendezvous_info,
        hvd_config=hvd_config,
    )
Ejemplo n.º 5
0
def test_one_batch(
    context_path: pathlib.Path,
    trial_class: Optional[Type[det.Trial]] = None,
    config: Optional[Dict[str, Any]] = None,
) -> None:
    # Override the batches_per_step value to 1.
    # TODO(DET-2931): Make the validation step a single batch as well.
    config = {**(config or {}), "batches_per_step": 1}

    print("Running a minimal test experiment locally")
    checkpoint_dir = tempfile.TemporaryDirectory()
    env, workloads, rendezvous_info, hvd_config = make_test_experiment_env(
        checkpoint_dir=pathlib.Path(checkpoint_dir.name), config=config)
    print(f"Using hyperparameters: {env.hparams}")
    if util.debug_mode():
        print(f"Using a test experiment config: {env.experiment_config}")

    with local_execution_manager(context_path):
        if not trial_class:
            if util.debug_mode():
                print("Loading trial class from experiment configuration")
            trial_class = load.load_trial_implementation(
                env.experiment_config["entrypoint"])

        controller = load.load_controller_from_trial(
            trial_class=trial_class,
            env=env,
            workloads=workloads,
            load_path=None,
            rendezvous_info=rendezvous_info,
            hvd_config=hvd_config,
        )
        controller.run()

    checkpoint_dir.cleanup()
    print(
        "Note: to submit an experiment to the cluster, change mode argument to Mode.CLUSTER"
    )
Ejemplo n.º 6
0
def local_experiment(args: Namespace) -> None:
    try:
        import determined as det
        from determined import experimental, load
    except ImportError as e:
        print("--local requires that the `determined` package is installed.")
        raise e

    if not args.test_mode:
        raise NotImplementedError(
            "Local training mode (--local mode without --test mode) is not yet supported. Please "
            "try local test mode by adding the --test flag or cluster training mode by removing "
            "the --local flag.")

    experiment_config = _parse_config_file_or_exit(args.config_file)

    determined_common.set_logger(bool(experiment_config.get("debug", False)))

    with det._local_execution_manager(args.model_def.resolve()):
        trial_class = load.load_trial_implementation(
            experiment_config["entrypoint"])
        experimental.test_one_batch(trial_class=trial_class,
                                    config=experiment_config)