Ejemplo n.º 1
0
def test_git_clone_repo_codecommit_https_creds_not_stored_locally(
        mkdtemp, check_call):
    git_config = {"repo": CODECOMMIT_REPO, "branch": CODECOMMIT_BRANCH}
    entry_point = "entry_point"
    with pytest.raises(subprocess.CalledProcessError) as error:
        git_utils.git_clone_repo(git_config, entry_point)
    assert "returned non-zero exit status" in str(error)
Ejemplo n.º 2
0
def test_git_clone_repo_codecommit_ssh_passphrase_required(
        mkdtemp, check_call):
    git_config = {"repo": CODECOMMIT_REPO_SSH, "branch": CODECOMMIT_BRANCH}
    entry_point = "entry_point"
    with pytest.raises(subprocess.CalledProcessError) as error:
        git_utils.git_clone_repo(git_config, entry_point)
    assert "returned non-zero exit status" in str(error)
def test_git_clone_repo_branch_not_exist(exists, isdir, isfile, mkdtemp, check_call):
    git_config = {"repo": GIT_REPO, "branch": BRANCH, "commit": COMMIT}
    entry_point = "entry_point"
    source_dir = "source_dir"
    dependencies = ["foo", "bar"]
    with pytest.raises(subprocess.CalledProcessError) as error:
        git_utils.git_clone_repo(git_config, entry_point, source_dir, dependencies)
    assert "returned non-zero exit status" in str(error)
def test_git_clone_repo_repo_not_provided(exists, isdir, isfile, mkdtemp, check_call):
    git_config = {"branch": BRANCH, "commit": COMMIT}
    entry_point = "entry_point_that_does_not_exist"
    source_dir = "source_dir"
    dependencies = ["foo", "bar"]
    with pytest.raises(ValueError) as error:
        git_utils.git_clone_repo(git_config, entry_point, source_dir, dependencies)
    assert "Please provide a repo for git_config." in str(error)
def test_git_clone_repo_dependencies_not_exist(exists, isdir, isfile, mkdtemp, check_call):
    git_config = {"repo": GIT_REPO, "branch": BRANCH, "commit": COMMIT}
    entry_point = "entry_point"
    source_dir = "source_dir"
    dependencies = ["foo", "dep_that_does_not_exist"]
    with pytest.raises(ValueError) as error:
        git_utils.git_clone_repo(git_config, entry_point, source_dir, dependencies)
    assert "does not exist in the repo." in str(error)
Ejemplo n.º 6
0
def test_git_clone_repo_with_and_token_2fa_wrong_creds(mkdtemp, check_call):
    git_config = {
        "repo": PRIVATE_GIT_REPO,
        "branch": PRIVATE_BRANCH,
        "commit": PRIVATE_COMMIT,
        "2FA_enabled": False,
        "token": "wrong-token",
    }
    entry_point = "entry_point"
    env = os.environ.copy()
    env["GIT_TERMINAL_PROMPT"] = "0"
    with pytest.raises(subprocess.CalledProcessError) as error:
        git_utils.git_clone_repo(git_config=git_config,
                                 entry_point=entry_point)
    assert "returned non-zero exit status" in str(error)
Ejemplo n.º 7
0
def test_git_clone_repo_git_argument_wrong_format():
    git_config = {
        "repo": PUBLIC_GIT_REPO,
        "branch": PUBLIC_BRANCH,
        "commit": PUBLIC_COMMIT,
        "token": 42,
    }
    entry_point = "entry_point"
    source_dir = "source_dir"
    dependencies = ["foo", "bar"]
    env = os.environ.copy()
    env["GIT_TERMINAL_PROMPT"] = "0"
    with pytest.raises(ValueError) as error:
        git_utils.git_clone_repo(git_config, entry_point, source_dir,
                                 dependencies)
    assert "'token' must be a string." in str(error)
Ejemplo n.º 8
0
def test_git_clone_repo_codecommit_https_with_username_and_password(
        isfile, mkdtemp, check_call):
    git_config = {
        "repo": CODECOMMIT_REPO,
        "branch": CODECOMMIT_BRANCH,
        "username": "******",
        "password": "******",
    }
    entry_point = "entry_point"
    env = os.environ.copy()
    env["GIT_TERMINAL_PROMPT"] = "0"
    ret = git_utils.git_clone_repo(git_config=git_config,
                                   entry_point=entry_point)
    check_call.assert_any_call(
        [
            "git",
            "clone",
            "https://*****:*****@git-codecommit.us-west-2.amazonaws.com/v1/repos/test-repo/",
            REPO_DIR,
        ],
        env=env,
    )
    check_call.assert_any_call(args=["git", "checkout", CODECOMMIT_BRANCH],
                               cwd=REPO_DIR)
    assert ret["entry_point"] == "/tmp/repo_dir/entry_point"
    assert ret["source_dir"] is None
    assert ret["dependencies"] is None
Ejemplo n.º 9
0
def test_git_clone_repo_with_token_2fa_unnecessary_creds_provided(
        isfile, mkdtemp, check_call):
    git_config = {
        "repo": PRIVATE_GIT_REPO,
        "branch": PRIVATE_BRANCH,
        "commit": PRIVATE_COMMIT,
        "2FA_enabled": True,
        "username": "******",
        "token": "my-token",
    }
    entry_point = "entry_point"
    env = os.environ.copy()
    env["GIT_TERMINAL_PROMPT"] = "0"
    with pytest.warns(UserWarning) as warn:
        ret = git_utils.git_clone_repo(git_config=git_config,
                                       entry_point=entry_point)
    assert (
        "Using token for authentication, other credentials will be ignored."
        in warn[0].message.args[0])
    check_call.assert_any_call(
        [
            "git", "clone",
            "https://[email protected]/testAccount/private-repo.git",
            REPO_DIR
        ],
        env=env,
    )
    check_call.assert_any_call(args=["git", "checkout", PRIVATE_BRANCH],
                               cwd=REPO_DIR)
    check_call.assert_any_call(args=["git", "checkout", PRIVATE_COMMIT],
                               cwd=REPO_DIR)
    assert ret["entry_point"] == "/tmp/repo_dir/entry_point"
    assert ret["source_dir"] is None
    assert ret["dependencies"] is None
Ejemplo n.º 10
0
def test_git_clone_repo_with_token_2fa(isfile, mkdtemp, check_call):
    git_config = {
        "repo": PRIVATE_GIT_REPO,
        "branch": PRIVATE_BRANCH,
        "commit": PRIVATE_COMMIT,
        "2FA_enabled": True,
        "username": "******",
        "token": "my-token",
    }
    entry_point = "entry_point"
    env = os.environ.copy()
    env["GIT_TERMINAL_PROMPT"] = "0"
    ret = git_utils.git_clone_repo(git_config=git_config,
                                   entry_point=entry_point)
    check_call.assert_any_call(
        [
            "git", "clone",
            "https://[email protected]/testAccount/private-repo.git",
            REPO_DIR
        ],
        env=env,
    )
    check_call.assert_any_call(args=["git", "checkout", PRIVATE_BRANCH],
                               cwd=REPO_DIR)
    check_call.assert_any_call(args=["git", "checkout", PRIVATE_COMMIT],
                               cwd=REPO_DIR)
    assert ret["entry_point"] == "/tmp/repo_dir/entry_point"
    assert ret["source_dir"] is None
    assert ret["dependencies"] is None
Ejemplo n.º 11
0
def test_git_clone_repo_ssh(isfile, mkdtemp, check_call):
    git_config = {
        "repo": PRIVATE_GIT_REPO_SSH,
        "branch": PRIVATE_BRANCH,
        "commit": PRIVATE_COMMIT
    }
    entry_point = "entry_point"
    ret = git_utils.git_clone_repo(git_config, entry_point)
    assert ret["entry_point"] == "/tmp/repo_dir/entry_point"
    assert ret["source_dir"] is None
    assert ret["dependencies"] is None
def test_git_clone_repo_succeed(exists, isdir, isfile, mkdtemp, check_call):
    git_config = {"repo": GIT_REPO, "branch": BRANCH, "commit": COMMIT}
    entry_point = "entry_point"
    source_dir = "source_dir"
    dependencies = ["foo", "bar"]
    ret = git_utils.git_clone_repo(git_config, entry_point, source_dir, dependencies)
    check_call.assert_any_call(["git", "clone", git_config["repo"], REPO_DIR])
    check_call.assert_any_call(args=["git", "checkout", BRANCH], cwd=REPO_DIR)
    check_call.assert_any_call(args=["git", "checkout", COMMIT], cwd=REPO_DIR)
    mkdtemp.assert_called_once()
    assert ret["entry_point"] == "entry_point"
    assert ret["source_dir"] == "/tmp/repo_dir/source_dir"
    assert ret["dependencies"] == ["/tmp/repo_dir/foo", "/tmp/repo_dir/bar"]
Ejemplo n.º 13
0
def test_git_clone_repo_succeed(exists, isdir, isfile, mkdtemp, check_call):
    git_config = {
        "repo": PUBLIC_GIT_REPO,
        "branch": PUBLIC_BRANCH,
        "commit": PUBLIC_COMMIT
    }
    entry_point = "entry_point"
    source_dir = "source_dir"
    dependencies = ["foo", "bar"]
    env = os.environ.copy()
    env["GIT_TERMINAL_PROMPT"] = "0"
    ret = git_utils.git_clone_repo(git_config, entry_point, source_dir,
                                   dependencies)
    check_call.assert_any_call(["git", "clone", git_config["repo"], REPO_DIR],
                               env=env)
    check_call.assert_any_call(args=["git", "checkout", PUBLIC_BRANCH],
                               cwd=REPO_DIR)
    check_call.assert_any_call(args=["git", "checkout", PUBLIC_COMMIT],
                               cwd=REPO_DIR)
    mkdtemp.assert_called_once()
    assert ret["entry_point"] == "entry_point"
    assert ret["source_dir"] == "/tmp/repo_dir/source_dir"
    assert ret["dependencies"] == ["/tmp/repo_dir/foo", "/tmp/repo_dir/bar"]
Ejemplo n.º 14
0
    def __init__(self,
                 model_data,
                 image,
                 role,
                 entry_point,
                 source_dir=None,
                 predictor_cls=None,
                 env=None,
                 name=None,
                 enable_cloudwatch_metrics=False,
                 container_log_level=logging.INFO,
                 code_location=None,
                 sagemaker_session=None,
                 dependencies=None,
                 git_config=None,
                 **kwargs):
        """Initialize a ``FrameworkModel``.

        Args:
            model_data (str): The S3 location of a SageMaker model data ``.tar.gz`` file.
            image (str): A Docker image URI.
            role (str): An IAM role name or ARN for SageMaker to access AWS resources on your behalf.
            entry_point (str): Path (absolute or relative) to the Python source file which should be executed
                as the entry point to model hosting. This should be compatible with either Python 2.7 or Python 3.5.
                If 'git_config' is provided, 'entry_point' should be a relative location to the Python source file in
                the Git repo.
                Example:

                    With the following GitHub repo directory structure:

                    >>> |----- README.md
                    >>> |----- src
                    >>>         |----- inference.py
                    >>>         |----- test.py

                    You can assign entry_point='src/inference.py'.
            git_config (dict[str, str]): Git configurations used for cloning files, including ``repo``, ``branch``,
                ``commit``, ``2FA_enabled``, ``username``, ``password`` and ``token``. The ``repo`` field is required.
                All other fields are optional. ``repo`` specifies the Git repository where your training script is
                stored. If you don't provide ``branch``, the default value  'master' is used. If you don't provide
                ``commit``, the latest commit in the specified branch is used.
                Example:

                    The following config:

                    >>> git_config = {'repo': 'https://github.com/aws/sagemaker-python-sdk.git',
                    >>>               'branch': 'test-branch-git-config',
                    >>>               'commit': '329bfcf884482002c05ff7f44f62599ebc9f445a'}

                    results in cloning the repo specified in 'repo', then checkout the 'master' branch, and checkout
                    the specified commit.
                ``2FA_enabled``, ``username``, ``password`` and ``token`` are used for authentication. For GitHub
                (or other Git) accounts, set ``2FA_enabled`` to 'True' if two-factor authentication is enabled for the
                account, otherwise set it to 'False'. If you do not provide a value for ``2FA_enabled``, a default
                value of 'False' is used. CodeCommit does not support two-factor authentication, so do not provide
                "2FA_enabled" with CodeCommit repositories.

                For GitHub and other Git repos, when SSH URLs are provided, it doesn't matter whether 2FA is
                enabled or disabled; you should either have no passphrase for the SSH key pairs, or have the ssh-agent
                configured so that you will not be prompted for SSH passphrase when you do 'git clone' command with SSH
                URLs. When HTTPS URLs are provided: if 2FA is disabled, then either token or username+password will be
                used for authentication if provided (token prioritized); if 2FA is enabled, only token will be used for
                authentication if provided. If required authentication info is not provided, python SDK will try to use
                local credentials storage to authenticate. If that fails either, an error message will be thrown.

                For CodeCommit repos, 2FA is not supported, so '2FA_enabled' should not be provided. There is no token
                in CodeCommit, so 'token' should not be provided too. When 'repo' is an SSH URL, the requirements are
                the same as GitHub-like repos. When 'repo' is an HTTPS URL, username+password will be used for
                authentication if they are provided; otherwise, python SDK will try to use either CodeCommit credential
                helper or local credential storage for authentication.
            source_dir (str): Path (absolute or relative) to a directory with any other training
                source code dependencies aside from the entry point file (default: None). Structure within this
                directory will be preserved when training on SageMaker. If 'git_config' is provided,
                'source_dir' should be a relative location to a directory in the Git repo. If the directory points
                to S3, no code will be uploaded and the S3 location will be used instead.
                Example:

                    With the following GitHub repo directory structure:

                    >>> |----- README.md
                    >>> |----- src
                    >>>         |----- inference.py
                    >>>         |----- test.py

                    You can assign entry_point='inference.py', source_dir='src'.
            dependencies (list[str]): A list of paths to directories (absolute or relative) with
                any additional libraries that will be exported to the container (default: []).
                The library folders will be copied to SageMaker in the same folder where the entrypoint is copied.
                If 'git_config' is provided, 'dependencies' should be a list of relative locations to directories
                with any additional libraries needed in the Git repo. If the ```source_dir``` points to S3, code
                will be uploaded and the S3 location will be used instead.
                Example:

                    The following call
                    >>> Estimator(entry_point='inference.py', dependencies=['my/libs/common', 'virtual-env'])
                    results in the following inside the container:

                    >>> $ ls

                    >>> opt/ml/code
                    >>>     |------ inference.py
                    >>>     |------ common
                    >>>     |------ virtual-env

            predictor_cls (callable[string, sagemaker.session.Session]): A function to call to create
               a predictor (default: None). If not None, ``deploy`` will return the result of invoking
               this function on the created endpoint name.
            env (dict[str, str]): Environment variables to run with ``image`` when hosted in SageMaker
               (default: None).
            name (str): The model name. If None, a default model name will be selected on each ``deploy``.
            enable_cloudwatch_metrics (bool): Whether training and hosting containers will
               generate CloudWatch metrics under the AWS/SageMakerContainer namespace (default: False).
            container_log_level (int): Log level to use within the container (default: logging.INFO).
                Valid values are defined in the Python logging module.
            code_location (str): Name of the S3 bucket where custom code is uploaded (default: None).
                If not specified, default bucket created by ``sagemaker.session.Session`` is used.
            sagemaker_session (sagemaker.session.Session): A SageMaker Session object, used for SageMaker
               interactions (default: None). If not specified, one is created using the default AWS configuration chain.
            **kwargs: Keyword arguments passed to the ``Model`` initializer.
        """
        super(FrameworkModel,
              self).__init__(model_data,
                             image,
                             role,
                             predictor_cls=predictor_cls,
                             env=env,
                             name=name,
                             sagemaker_session=sagemaker_session,
                             **kwargs)
        self.entry_point = entry_point
        self.source_dir = source_dir
        self.dependencies = dependencies or []
        self.git_config = git_config
        self.enable_cloudwatch_metrics = enable_cloudwatch_metrics
        self.container_log_level = container_log_level
        if code_location:
            self.bucket, self.key_prefix = fw_utils.parse_s3_url(code_location)
        else:
            self.bucket, self.key_prefix = None, None
        if self.git_config:
            updates = git_utils.git_clone_repo(self.git_config,
                                               self.entry_point,
                                               self.source_dir,
                                               self.dependencies)
            self.entry_point = updates["entry_point"]
            self.source_dir = updates["source_dir"]
            self.dependencies = updates["dependencies"]
        self.uploaded_code = None
        self.repacked_model_data = None