def test_git_clone_repo_codecommit_https_creds_not_stored_locally( mkdtemp, check_call): git_config = {"repo": CODECOMMIT_REPO, "branch": CODECOMMIT_BRANCH} entry_point = "entry_point" with pytest.raises(subprocess.CalledProcessError) as error: git_utils.git_clone_repo(git_config, entry_point) assert "returned non-zero exit status" in str(error)
def test_git_clone_repo_codecommit_ssh_passphrase_required( mkdtemp, check_call): git_config = {"repo": CODECOMMIT_REPO_SSH, "branch": CODECOMMIT_BRANCH} entry_point = "entry_point" with pytest.raises(subprocess.CalledProcessError) as error: git_utils.git_clone_repo(git_config, entry_point) assert "returned non-zero exit status" in str(error)
def test_git_clone_repo_branch_not_exist(exists, isdir, isfile, mkdtemp, check_call): git_config = {"repo": GIT_REPO, "branch": BRANCH, "commit": COMMIT} entry_point = "entry_point" source_dir = "source_dir" dependencies = ["foo", "bar"] with pytest.raises(subprocess.CalledProcessError) as error: git_utils.git_clone_repo(git_config, entry_point, source_dir, dependencies) assert "returned non-zero exit status" in str(error)
def test_git_clone_repo_repo_not_provided(exists, isdir, isfile, mkdtemp, check_call): git_config = {"branch": BRANCH, "commit": COMMIT} entry_point = "entry_point_that_does_not_exist" source_dir = "source_dir" dependencies = ["foo", "bar"] with pytest.raises(ValueError) as error: git_utils.git_clone_repo(git_config, entry_point, source_dir, dependencies) assert "Please provide a repo for git_config." in str(error)
def test_git_clone_repo_dependencies_not_exist(exists, isdir, isfile, mkdtemp, check_call): git_config = {"repo": GIT_REPO, "branch": BRANCH, "commit": COMMIT} entry_point = "entry_point" source_dir = "source_dir" dependencies = ["foo", "dep_that_does_not_exist"] with pytest.raises(ValueError) as error: git_utils.git_clone_repo(git_config, entry_point, source_dir, dependencies) assert "does not exist in the repo." in str(error)
def test_git_clone_repo_with_and_token_2fa_wrong_creds(mkdtemp, check_call): git_config = { "repo": PRIVATE_GIT_REPO, "branch": PRIVATE_BRANCH, "commit": PRIVATE_COMMIT, "2FA_enabled": False, "token": "wrong-token", } entry_point = "entry_point" env = os.environ.copy() env["GIT_TERMINAL_PROMPT"] = "0" with pytest.raises(subprocess.CalledProcessError) as error: git_utils.git_clone_repo(git_config=git_config, entry_point=entry_point) assert "returned non-zero exit status" in str(error)
def test_git_clone_repo_git_argument_wrong_format(): git_config = { "repo": PUBLIC_GIT_REPO, "branch": PUBLIC_BRANCH, "commit": PUBLIC_COMMIT, "token": 42, } entry_point = "entry_point" source_dir = "source_dir" dependencies = ["foo", "bar"] env = os.environ.copy() env["GIT_TERMINAL_PROMPT"] = "0" with pytest.raises(ValueError) as error: git_utils.git_clone_repo(git_config, entry_point, source_dir, dependencies) assert "'token' must be a string." in str(error)
def test_git_clone_repo_codecommit_https_with_username_and_password( isfile, mkdtemp, check_call): git_config = { "repo": CODECOMMIT_REPO, "branch": CODECOMMIT_BRANCH, "username": "******", "password": "******", } entry_point = "entry_point" env = os.environ.copy() env["GIT_TERMINAL_PROMPT"] = "0" ret = git_utils.git_clone_repo(git_config=git_config, entry_point=entry_point) check_call.assert_any_call( [ "git", "clone", "https://*****:*****@git-codecommit.us-west-2.amazonaws.com/v1/repos/test-repo/", REPO_DIR, ], env=env, ) check_call.assert_any_call(args=["git", "checkout", CODECOMMIT_BRANCH], cwd=REPO_DIR) assert ret["entry_point"] == "/tmp/repo_dir/entry_point" assert ret["source_dir"] is None assert ret["dependencies"] is None
def test_git_clone_repo_with_token_2fa_unnecessary_creds_provided( isfile, mkdtemp, check_call): git_config = { "repo": PRIVATE_GIT_REPO, "branch": PRIVATE_BRANCH, "commit": PRIVATE_COMMIT, "2FA_enabled": True, "username": "******", "token": "my-token", } entry_point = "entry_point" env = os.environ.copy() env["GIT_TERMINAL_PROMPT"] = "0" with pytest.warns(UserWarning) as warn: ret = git_utils.git_clone_repo(git_config=git_config, entry_point=entry_point) assert ( "Using token for authentication, other credentials will be ignored." in warn[0].message.args[0]) check_call.assert_any_call( [ "git", "clone", "https://[email protected]/testAccount/private-repo.git", REPO_DIR ], env=env, ) check_call.assert_any_call(args=["git", "checkout", PRIVATE_BRANCH], cwd=REPO_DIR) check_call.assert_any_call(args=["git", "checkout", PRIVATE_COMMIT], cwd=REPO_DIR) assert ret["entry_point"] == "/tmp/repo_dir/entry_point" assert ret["source_dir"] is None assert ret["dependencies"] is None
def test_git_clone_repo_with_token_2fa(isfile, mkdtemp, check_call): git_config = { "repo": PRIVATE_GIT_REPO, "branch": PRIVATE_BRANCH, "commit": PRIVATE_COMMIT, "2FA_enabled": True, "username": "******", "token": "my-token", } entry_point = "entry_point" env = os.environ.copy() env["GIT_TERMINAL_PROMPT"] = "0" ret = git_utils.git_clone_repo(git_config=git_config, entry_point=entry_point) check_call.assert_any_call( [ "git", "clone", "https://[email protected]/testAccount/private-repo.git", REPO_DIR ], env=env, ) check_call.assert_any_call(args=["git", "checkout", PRIVATE_BRANCH], cwd=REPO_DIR) check_call.assert_any_call(args=["git", "checkout", PRIVATE_COMMIT], cwd=REPO_DIR) assert ret["entry_point"] == "/tmp/repo_dir/entry_point" assert ret["source_dir"] is None assert ret["dependencies"] is None
def test_git_clone_repo_ssh(isfile, mkdtemp, check_call): git_config = { "repo": PRIVATE_GIT_REPO_SSH, "branch": PRIVATE_BRANCH, "commit": PRIVATE_COMMIT } entry_point = "entry_point" ret = git_utils.git_clone_repo(git_config, entry_point) assert ret["entry_point"] == "/tmp/repo_dir/entry_point" assert ret["source_dir"] is None assert ret["dependencies"] is None
def test_git_clone_repo_succeed(exists, isdir, isfile, mkdtemp, check_call): git_config = {"repo": GIT_REPO, "branch": BRANCH, "commit": COMMIT} entry_point = "entry_point" source_dir = "source_dir" dependencies = ["foo", "bar"] ret = git_utils.git_clone_repo(git_config, entry_point, source_dir, dependencies) check_call.assert_any_call(["git", "clone", git_config["repo"], REPO_DIR]) check_call.assert_any_call(args=["git", "checkout", BRANCH], cwd=REPO_DIR) check_call.assert_any_call(args=["git", "checkout", COMMIT], cwd=REPO_DIR) mkdtemp.assert_called_once() assert ret["entry_point"] == "entry_point" assert ret["source_dir"] == "/tmp/repo_dir/source_dir" assert ret["dependencies"] == ["/tmp/repo_dir/foo", "/tmp/repo_dir/bar"]
def test_git_clone_repo_succeed(exists, isdir, isfile, mkdtemp, check_call): git_config = { "repo": PUBLIC_GIT_REPO, "branch": PUBLIC_BRANCH, "commit": PUBLIC_COMMIT } entry_point = "entry_point" source_dir = "source_dir" dependencies = ["foo", "bar"] env = os.environ.copy() env["GIT_TERMINAL_PROMPT"] = "0" ret = git_utils.git_clone_repo(git_config, entry_point, source_dir, dependencies) check_call.assert_any_call(["git", "clone", git_config["repo"], REPO_DIR], env=env) check_call.assert_any_call(args=["git", "checkout", PUBLIC_BRANCH], cwd=REPO_DIR) check_call.assert_any_call(args=["git", "checkout", PUBLIC_COMMIT], cwd=REPO_DIR) mkdtemp.assert_called_once() assert ret["entry_point"] == "entry_point" assert ret["source_dir"] == "/tmp/repo_dir/source_dir" assert ret["dependencies"] == ["/tmp/repo_dir/foo", "/tmp/repo_dir/bar"]
def __init__(self, model_data, image, role, entry_point, source_dir=None, predictor_cls=None, env=None, name=None, enable_cloudwatch_metrics=False, container_log_level=logging.INFO, code_location=None, sagemaker_session=None, dependencies=None, git_config=None, **kwargs): """Initialize a ``FrameworkModel``. Args: model_data (str): The S3 location of a SageMaker model data ``.tar.gz`` file. image (str): A Docker image URI. role (str): An IAM role name or ARN for SageMaker to access AWS resources on your behalf. entry_point (str): Path (absolute or relative) to the Python source file which should be executed as the entry point to model hosting. This should be compatible with either Python 2.7 or Python 3.5. If 'git_config' is provided, 'entry_point' should be a relative location to the Python source file in the Git repo. Example: With the following GitHub repo directory structure: >>> |----- README.md >>> |----- src >>> |----- inference.py >>> |----- test.py You can assign entry_point='src/inference.py'. git_config (dict[str, str]): Git configurations used for cloning files, including ``repo``, ``branch``, ``commit``, ``2FA_enabled``, ``username``, ``password`` and ``token``. The ``repo`` field is required. All other fields are optional. ``repo`` specifies the Git repository where your training script is stored. If you don't provide ``branch``, the default value 'master' is used. If you don't provide ``commit``, the latest commit in the specified branch is used. Example: The following config: >>> git_config = {'repo': 'https://github.com/aws/sagemaker-python-sdk.git', >>> 'branch': 'test-branch-git-config', >>> 'commit': '329bfcf884482002c05ff7f44f62599ebc9f445a'} results in cloning the repo specified in 'repo', then checkout the 'master' branch, and checkout the specified commit. ``2FA_enabled``, ``username``, ``password`` and ``token`` are used for authentication. For GitHub (or other Git) accounts, set ``2FA_enabled`` to 'True' if two-factor authentication is enabled for the account, otherwise set it to 'False'. If you do not provide a value for ``2FA_enabled``, a default value of 'False' is used. CodeCommit does not support two-factor authentication, so do not provide "2FA_enabled" with CodeCommit repositories. For GitHub and other Git repos, when SSH URLs are provided, it doesn't matter whether 2FA is enabled or disabled; you should either have no passphrase for the SSH key pairs, or have the ssh-agent configured so that you will not be prompted for SSH passphrase when you do 'git clone' command with SSH URLs. When HTTPS URLs are provided: if 2FA is disabled, then either token or username+password will be used for authentication if provided (token prioritized); if 2FA is enabled, only token will be used for authentication if provided. If required authentication info is not provided, python SDK will try to use local credentials storage to authenticate. If that fails either, an error message will be thrown. For CodeCommit repos, 2FA is not supported, so '2FA_enabled' should not be provided. There is no token in CodeCommit, so 'token' should not be provided too. When 'repo' is an SSH URL, the requirements are the same as GitHub-like repos. When 'repo' is an HTTPS URL, username+password will be used for authentication if they are provided; otherwise, python SDK will try to use either CodeCommit credential helper or local credential storage for authentication. source_dir (str): Path (absolute or relative) to a directory with any other training source code dependencies aside from the entry point file (default: None). Structure within this directory will be preserved when training on SageMaker. If 'git_config' is provided, 'source_dir' should be a relative location to a directory in the Git repo. If the directory points to S3, no code will be uploaded and the S3 location will be used instead. Example: With the following GitHub repo directory structure: >>> |----- README.md >>> |----- src >>> |----- inference.py >>> |----- test.py You can assign entry_point='inference.py', source_dir='src'. dependencies (list[str]): A list of paths to directories (absolute or relative) with any additional libraries that will be exported to the container (default: []). The library folders will be copied to SageMaker in the same folder where the entrypoint is copied. If 'git_config' is provided, 'dependencies' should be a list of relative locations to directories with any additional libraries needed in the Git repo. If the ```source_dir``` points to S3, code will be uploaded and the S3 location will be used instead. Example: The following call >>> Estimator(entry_point='inference.py', dependencies=['my/libs/common', 'virtual-env']) results in the following inside the container: >>> $ ls >>> opt/ml/code >>> |------ inference.py >>> |------ common >>> |------ virtual-env predictor_cls (callable[string, sagemaker.session.Session]): A function to call to create a predictor (default: None). If not None, ``deploy`` will return the result of invoking this function on the created endpoint name. env (dict[str, str]): Environment variables to run with ``image`` when hosted in SageMaker (default: None). name (str): The model name. If None, a default model name will be selected on each ``deploy``. enable_cloudwatch_metrics (bool): Whether training and hosting containers will generate CloudWatch metrics under the AWS/SageMakerContainer namespace (default: False). container_log_level (int): Log level to use within the container (default: logging.INFO). Valid values are defined in the Python logging module. code_location (str): Name of the S3 bucket where custom code is uploaded (default: None). If not specified, default bucket created by ``sagemaker.session.Session`` is used. sagemaker_session (sagemaker.session.Session): A SageMaker Session object, used for SageMaker interactions (default: None). If not specified, one is created using the default AWS configuration chain. **kwargs: Keyword arguments passed to the ``Model`` initializer. """ super(FrameworkModel, self).__init__(model_data, image, role, predictor_cls=predictor_cls, env=env, name=name, sagemaker_session=sagemaker_session, **kwargs) self.entry_point = entry_point self.source_dir = source_dir self.dependencies = dependencies or [] self.git_config = git_config self.enable_cloudwatch_metrics = enable_cloudwatch_metrics self.container_log_level = container_log_level if code_location: self.bucket, self.key_prefix = fw_utils.parse_s3_url(code_location) else: self.bucket, self.key_prefix = None, None if self.git_config: updates = git_utils.git_clone_repo(self.git_config, self.entry_point, self.source_dir, self.dependencies) self.entry_point = updates["entry_point"] self.source_dir = updates["source_dir"] self.dependencies = updates["dependencies"] self.uploaded_code = None self.repacked_model_data = None