def test_get_runner_by_mpi_with_extra_args(training_env): training_env().num_gpus = 0 runner = _runner.get(_runner.MPIRunnerType, USER_SCRIPT, CMD_ARGS, ENV_VARS, MPI_OPTS) assert isinstance(runner, _mpi.MasterRunner) assert runner._user_entry_point == USER_SCRIPT assert runner._args == CMD_ARGS assert runner._env_vars == ENV_VARS assert runner._process_per_host == 2 assert runner._num_processes == 4 assert runner._custom_mpi_options == NCCL_DEBUG_MPI_OPT training_env().to_cmd_args.assert_not_called() training_env().to_env_vars.assert_not_called() training_env().user_entry_point.assert_not_called() training_env().additional_framework_parameters.assert_not_called() training_env().is_master = False runner = _runner.get(_runner.MPIRunnerType, USER_SCRIPT, CMD_ARGS, ENV_VARS) assert isinstance(runner, _mpi.WorkerRunner) assert runner._user_entry_point == USER_SCRIPT assert runner._args == CMD_ARGS assert runner._env_vars == ENV_VARS training_env().to_cmd_args.assert_not_called() training_env().to_env_vars.assert_not_called() training_env().user_entry_point.assert_not_called()
def test_get_runner_by_mpi_returns_runnner(training_env): runner = _runner.get(_runner.MPIRunnerType) assert isinstance(runner, _mpi.MasterRunner) training_env().to_cmd_args.assert_called() training_env().to_env_vars.assert_called() training_env().is_master = False runner = _runner.get(_runner.MPIRunnerType) assert isinstance(runner, _mpi.WorkerRunner) training_env().to_cmd_args.assert_called() training_env().to_env_vars.assert_called()
def test_runnner_with_default_gpu_processes_per_host(training_env): training_env().additional_framework_parameters = dict() training_env().num_gpus = 2 runner = _runner.get(_runner.MPIRunnerType) assert isinstance(runner, _mpi.MasterRunner) assert runner._process_per_host == 2
def run(uri, user_entry_point, args, env_vars=None, wait=True, capture_error=False, runner=_runner.ProcessRunnerType): # type: (str, str, List[str], Dict[str, str], bool, bool, _runner.RunnerType) -> None """Download, prepare and executes a compressed tar file from S3 or provided directory as an user entrypoint. Runs the user entry point, passing env_vars as environment variables and args as command arguments. If the entry point is: - A Python package: executes the packages as >>> env_vars python -m module_name + args - A Python script: executes the script as >>> env_vars python module_name + args - Any other: executes the command as >>> env_vars /bin/sh -c ./module_name + args Example: >>>import sagemaker_containers >>>from sagemaker_containers.beta.framework import entry_point >>>env = sagemaker_containers.training_env() {'channel-input-dirs': {'training': '/opt/ml/input/training'}, 'model_dir': '/opt/ml/model', ...} >>>hyperparameters = env.hyperparameters {'batch-size': 128, 'model_dir': '/opt/ml/model'} >>>args = mapping.to_cmd_args(hyperparameters) ['--batch-size', '128', '--model_dir', '/opt/ml/model'] >>>env_vars = mapping.to_env_vars() ['SAGEMAKER_CHANNELS':'training', 'SAGEMAKER_CHANNEL_TRAINING':'/opt/ml/input/training', 'MODEL_DIR':'/opt/ml/model', ...} >>>entry_point.run('user_script', args, env_vars) SAGEMAKER_CHANNELS=training SAGEMAKER_CHANNEL_TRAINING=/opt/ml/input/training \ SAGEMAKER_MODEL_DIR=/opt/ml/model python -m user_script --batch-size 128 --model_dir /opt/ml/model Args: user_entry_point (str): name of the user provided entry point args (list): A list of program arguments. env_vars (dict): A map containing the environment variables to be written. uri (str): the location of the module. capture_error (bool): Default false. If True, the running process captures the stderr, and appends it to the returned Exception message in case of errors. """ env_vars = env_vars or {} env_vars = env_vars.copy() _files.download_and_extract(uri, user_entry_point, _env.code_dir) install(user_entry_point, _env.code_dir, capture_error) _env.write_env_vars(env_vars) return _runner.get(runner).run(wait, capture_error)
def test_get_runner_by_process_with_extra_args(training_env): runner = _runner.get(_runner.ProcessRunnerType, USER_SCRIPT, CMD_ARGS, ENV_VARS) assert isinstance(runner, _process.ProcessRunner) assert runner._user_entry_point == USER_SCRIPT assert runner._args == CMD_ARGS assert runner._env_vars == ENV_VARS training_env().to_cmd_args.assert_not_called() training_env().to_env_vars.assert_not_called() training_env().user_entry_point.assert_not_called()
def run( uri, user_entry_point, args, env_vars=None, wait=True, capture_error=False, runner=_runner.ProcessRunnerType, extra_opts=None, ): # type: (str, str, List[str], Dict[str, str], bool, bool, _runner.RunnerType,Dict[str, str]) -> None # pylint: disable=line-too-long # noqa ignore=E501 """Download, prepare and executes a compressed tar file from S3 or provided directory as an user entrypoint. Runs the user entry point, passing env_vars as environment variables and args as command arguments. If the entry point is: - A Python package: executes the packages as >>> env_vars python -m module_name + args - A Python script: executes the script as >>> env_vars python module_name + args - Any other: executes the command as >>> env_vars /bin/sh -c ./module_name + args Example: >>>import sagemaker_containers >>>from sagemaker_containers.beta.framework import entry_point >>>env = sagemaker_containers.training_env() {'channel-input-dirs': {'training': '/opt/ml/input/training'}, 'model_dir': '/opt/ml/model', ...} >>>hyperparameters = env.hyperparameters {'batch-size': 128, 'model_dir': '/opt/ml/model'} >>>args = mapping.to_cmd_args(hyperparameters) ['--batch-size', '128', '--model_dir', '/opt/ml/model'] >>>env_vars = mapping.to_env_vars() ['SAGEMAKER_CHANNELS':'training', 'SAGEMAKER_CHANNEL_TRAINING':'/opt/ml/input/training', 'MODEL_DIR':'/opt/ml/model', ...} >>>entry_point.run('user_script', args, env_vars) SAGEMAKER_CHANNELS=training SAGEMAKER_CHANNEL_TRAINING=/opt/ml/input/training \ SAGEMAKER_MODEL_DIR=/opt/ml/model python -m user_script --batch-size 128 --model_dir /opt/ml/model Args: uri (str): the location of the module. user_entry_point (str): name of the user provided entry point args (list): A list of program arguments. env_vars (dict): A map containing the environment variables to be written (default: None). wait (bool): If the user entry point should be run to completion before this method returns (default: True). capture_error (bool): Default false. If True, the running process captures the stderr, and appends it to the returned Exception message in case of errors. runner (sagemaker_containers.beta.framework.runner.RunnerType): the type of runner object to be created (default: sagemaker_containers.beta.framework.runner.ProcessRunnerType). extra_opts (dict): Additional options for running the entry point (default: None). Currently, this only applies for MPI. Returns: sagemaker_containers.beta.framework.process.ProcessRunner: the runner object responsible for executing the entry point. """ env_vars = env_vars or {} env_vars = env_vars.copy() _files.download_and_extract(uri, _env.code_dir) install(user_entry_point, _env.code_dir, capture_error) _env.write_env_vars(env_vars) _wait_hostname_resolution() return _runner.get(runner, user_entry_point, args, env_vars, extra_opts).run(wait, capture_error)
def test_get_runner_by_process_returns_runnner(training_env): runner = _runner.get(_runner.ProcessRunnerType) assert isinstance(runner, _process.ProcessRunner) training_env().to_cmd_args.assert_called() training_env().to_env_vars.assert_called()
def test_get_runner_returns_runnner_itself(runner_class): runner = MagicMock(spec=runner_class) assert _runner.get(runner) == runner
def test_get_runner_invalid_identifier(): with pytest.raises(ValueError): _runner.get(42)