Example #1
0
def prepare_framework_container_def(model, instance_type, s3_operations):
    """Prepare the framework model container information. Specify related S3
    operations for Airflow to perform. (Upload `source_dir` )

    Args:
        model (sagemaker.model.FrameworkModel): The framework model
        instance_type (str): The EC2 instance type to deploy this Model to. For
            example, 'ml.p2.xlarge'.
        s3_operations (dict): The dict to specify S3 operations (upload
            `source_dir` ).

    Returns:
        dict: The container information of this framework model.
    """
    deploy_image = model.image
    if not deploy_image:
        region_name = model.sagemaker_session.boto_session.region_name
        deploy_image = fw_utils.create_image_uri(
            region_name,
            model.__framework_name__,
            instance_type,
            model.framework_version,
            model.py_version,
        )

    base_name = utils.base_name_from_image(deploy_image)
    model.name = model.name or utils.name_from_base(base_name)

    bucket = model.bucket or model.sagemaker_session._default_bucket
    script = os.path.basename(model.entry_point)
    key = "{}/source/sourcedir.tar.gz".format(model.name)

    if model.source_dir and model.source_dir.lower().startswith("s3://"):
        code_dir = model.source_dir
        model.uploaded_code = fw_utils.UploadedCode(s3_prefix=code_dir,
                                                    script_name=script)
    else:
        code_dir = "s3://{}/{}".format(bucket, key)
        model.uploaded_code = fw_utils.UploadedCode(s3_prefix=code_dir,
                                                    script_name=script)
        s3_operations["S3Upload"] = [{
            "Path": model.source_dir or script,
            "Bucket": bucket,
            "Key": key,
            "Tar": True
        }]

    deploy_env = dict(model.env)
    deploy_env.update(model._framework_env_vars())

    try:
        if model.model_server_workers:
            deploy_env[
                sagemaker.model.MODEL_SERVER_WORKERS_PARAM_NAME.upper()] = str(
                    model.model_server_workers)
    except AttributeError:
        # This applies to a FrameworkModel which is not SageMaker Deep Learning Framework Model
        pass

    return sagemaker.container_def(deploy_image, model.model_data, deploy_env)
def prepare_framework(estimator, s3_operations):
    """Prepare S3 operations (specify where to upload `source_dir` ) and
    environment variables related to framework.

    Args:
        estimator (sagemaker.estimator.Estimator): The framework estimator to
            get information from and update.
        s3_operations (dict): The dict to specify s3 operations (upload
            `source_dir` ).
    """
    if estimator.code_location is not None:
        bucket, key = fw_utils.parse_s3_url(estimator.code_location)
        key = os.path.join(key, estimator._current_job_name, "source",
                           "sourcedir.tar.gz")
    elif estimator.uploaded_code is not None:
        bucket, key = fw_utils.parse_s3_url(estimator.uploaded_code.s3_prefix)
    else:
        bucket = estimator.sagemaker_session._default_bucket
        key = os.path.join(estimator._current_job_name, "source",
                           "sourcedir.tar.gz")

    script = os.path.basename(estimator.entry_point)

    if estimator.source_dir and estimator.source_dir.lower().startswith(
            "s3://"):
        code_dir = estimator.source_dir
        estimator.uploaded_code = fw_utils.UploadedCode(s3_prefix=code_dir,
                                                        script_name=script)
    else:
        code_dir = "s3://{}/{}".format(bucket, key)
        estimator.uploaded_code = fw_utils.UploadedCode(s3_prefix=code_dir,
                                                        script_name=script)
        s3_operations["S3Upload"] = [{
            "Path":
            estimator.source_dir or estimator.entry_point,
            "Bucket":
            bucket,
            "Key":
            key,
            "Tar":
            True,
        }]
    estimator._hyperparameters[sagemaker.model.DIR_PARAM_NAME] = code_dir
    estimator._hyperparameters[sagemaker.model.SCRIPT_PARAM_NAME] = script
    estimator._hyperparameters[
        sagemaker.model.
        CLOUDWATCH_METRICS_PARAM_NAME] = estimator.enable_cloudwatch_metrics
    estimator._hyperparameters[
        sagemaker.model.
        CONTAINER_LOG_LEVEL_PARAM_NAME] = estimator.container_log_level
    estimator._hyperparameters[
        sagemaker.model.JOB_NAME_PARAM_NAME] = estimator._current_job_name
    estimator._hyperparameters[
        sagemaker.model.
        SAGEMAKER_REGION_PARAM_NAME] = estimator.sagemaker_session.boto_region_name
def test_tar_and_upload_dir_s3(sagemaker_session):
    bucket = 'mybucker'
    s3_key_prefix = 'something/source'
    script = 'mnist.py'
    directory = 's3://m'
    result = fw_utils.tar_and_upload_dir(sagemaker_session, bucket, s3_key_prefix, script, directory)
    assert result == fw_utils.UploadedCode('s3://m', 'mnist.py')
def test_tar_and_upload_dir_not_s3(sagemaker_session):
    bucket = 'mybucket'
    s3_key_prefix = 'something/source'
    script = os.path.basename(__file__)
    directory = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
    result = fw_utils.tar_and_upload_dir(sagemaker_session, bucket, s3_key_prefix, script, directory)
    assert result == fw_utils.UploadedCode('s3://{}/{}/sourcedir.tar.gz'.format(bucket, s3_key_prefix),
                                           script)
Example #5
0
def test_tar_and_upload_dir_s3(sagemaker_session):
    bucket = "mybucket"
    s3_key_prefix = "something/source"
    script = "mnist.py"
    directory = "s3://m"
    result = fw_utils.tar_and_upload_dir(sagemaker_session, bucket,
                                         s3_key_prefix, script, directory)

    assert result == fw_utils.UploadedCode("s3://m", "mnist.py")
def test_tar_and_upload_dir_with_directories_and_files(sagemaker_session, tmpdir):
    file_tree(tmpdir, ['src-dir/a/b', 'src-dir/a/b2', 'src-dir/x/y', 'src-dir/x/y2', 'src-dir/z'])
    source_dir = os.path.join(str(tmpdir), 'src-dir')

    with patch('shutil.rmtree'):
        result = fw_utils.tar_and_upload_dir(sagemaker_session, 'bucket', 'prefix', 'a/b', source_dir)

    assert result == fw_utils.UploadedCode(s3_prefix='s3://bucket/prefix/sourcedir.tar.gz',
                                           script_name='a/b')

    assert {'/a/b', '/a/b2', '/x/y', '/x/y2', '/z'} == list_source_dir_files(sagemaker_session, tmpdir)
def test_tar_and_upload_dir_with_directory_and_files(sagemaker_session, tmpdir):
    file_tree(tmpdir, ['src-dir/train.py', 'src-dir/laucher', 'src-dir/module/__init__.py'])
    source_dir = os.path.join(str(tmpdir), 'src-dir')

    with patch('shutil.rmtree'):
        result = fw_utils.tar_and_upload_dir(sagemaker_session, 'bucket', 'prefix', 'train.py', source_dir)

    assert result == fw_utils.UploadedCode(s3_prefix='s3://bucket/prefix/sourcedir.tar.gz',
                                           script_name='train.py')

    assert {'/laucher', '/module/__init__.py', '/train.py'} == list_source_dir_files(sagemaker_session, tmpdir)
def test_tar_and_upload_dir_no_directory_only_entrypoint(sagemaker_session, tmpdir):
    source_dir = file_tree(tmpdir, ['train.py', 'not_me.py'])
    entrypoint = os.path.join(source_dir, 'train.py')

    with patch('shutil.rmtree'):
        result = fw_utils.tar_and_upload_dir(sagemaker_session, 'bucket', 'prefix', entrypoint, None)

    assert result == fw_utils.UploadedCode(s3_prefix='s3://bucket/prefix/sourcedir.tar.gz',
                                           script_name='train.py')

    assert {'/train.py'} == list_source_dir_files(sagemaker_session, tmpdir)
def test_test_tar_and_upload_dir_with_subfolders(sagemaker_session, tmpdir):
    file_tree(tmpdir, ['a/b/c', 'a/b/c2'])
    root = file_tree(tmpdir, ['x/y/z', 'x/y/z2'])

    with patch('shutil.rmtree'):
        result = fw_utils.tar_and_upload_dir(sagemaker_session, 'bucket', 'prefix', 'b/c',
                                             os.path.join(root, 'a'), [os.path.join(root, 'x')])

    assert result == fw_utils.UploadedCode(s3_prefix='s3://bucket/prefix/sourcedir.tar.gz',
                                           script_name='b/c')

    assert {'/b/c', '/b/c2', '/x/y/z', '/x/y/z2'} == list_source_dir_files(sagemaker_session, tmpdir)
def test_tar_and_upload_dir_s3_kms_enabled_by_default(utils, sagemaker_session):
    bucket = "mybucket"
    s3_key_prefix = "something/source"
    script = "inference.py"
    result = fw_utils.tar_and_upload_dir(sagemaker_session, bucket, s3_key_prefix, script)

    assert result == fw_utils.UploadedCode(
        "s3://{}/{}/sourcedir.tar.gz".format(bucket, s3_key_prefix), script
    )

    extra_args = {"ServerSideEncryption": "aws:kms"}
    obj = sagemaker_session.resource("s3").Object("", "")
    obj.upload_file.assert_called_with(utils.create_tar_file(), ExtraArgs=extra_args)
def test_tar_and_upload_dir_with_many_folders(sagemaker_session, tmpdir):
    file_tree(tmpdir, ['src-dir/a/b', 'src-dir/a/b2', 'common/x/y', 'common/x/y2', 't/y/z'])
    source_dir = os.path.join(str(tmpdir), 'src-dir')
    dependencies = [os.path.join(str(tmpdir), 'common'), os.path.join(str(tmpdir), 't', 'y', 'z')]

    with patch('shutil.rmtree'):
        result = fw_utils.tar_and_upload_dir(sagemaker_session, 'bucket', 'prefix',
                                             'pipeline.py', source_dir, dependencies)

    assert result == fw_utils.UploadedCode(s3_prefix='s3://bucket/prefix/sourcedir.tar.gz',
                                           script_name='pipeline.py')

    assert {'/a/b', '/a/b2', '/common/x/y', '/common/x/y2', '/z'} == list_source_dir_files(sagemaker_session, tmpdir)
Example #12
0
def test_tar_and_upload_dir_with_directory(sagemaker_session, tmpdir):
    file_tree(tmpdir, ["src-dir/train.py"])
    source_dir = os.path.join(str(tmpdir), "src-dir")

    with patch("shutil.rmtree"):
        result = fw_utils.tar_and_upload_dir(sagemaker_session, "bucket",
                                             "prefix", "train.py", source_dir)

    assert result == fw_utils.UploadedCode(
        s3_prefix="s3://bucket/prefix/sourcedir.tar.gz",
        script_name="train.py")

    assert {"/train.py"} == list_source_dir_files(sagemaker_session, tmpdir)
def test_tar_and_upload_dir_s3_with_kms(utils, sagemaker_session):

    result = fw_utils.tar_and_upload_dir(sagemaker_session,
                                         'mybucker',
                                         'something/source',
                                         'mnist.py',
                                         kms_key='kms-key')

    assert result == fw_utils.UploadedCode('s3://mybucker/something/source/sourcedir.tar.gz',
                                           'mnist.py')

    extra_args = {'ServerSideEncryption': 'aws:kms', 'SSEKMSKeyId': 'kms-key'}
    obj = sagemaker_session.resource('s3').Object('', '')
    obj.upload_file.assert_called_with(utils.create_tar_file(), ExtraArgs=extra_args)
Example #14
0
def test_tar_and_upload_dir_no_directory_only_entrypoint(
        sagemaker_session, tmpdir):
    source_dir = file_tree(tmpdir, ["train.py", "not_me.py"])
    entrypoint = os.path.join(source_dir, "train.py")

    with patch("shutil.rmtree"):
        result = fw_utils.tar_and_upload_dir(sagemaker_session, "bucket",
                                             "prefix", entrypoint, None)

    assert result == fw_utils.UploadedCode(
        s3_prefix="s3://bucket/prefix/sourcedir.tar.gz",
        script_name="train.py")

    assert {"/train.py"} == list_source_dir_files(sagemaker_session, tmpdir)
Example #15
0
def prepare_framework(estimator, s3_operations):
    """Prepare S3 operations (specify where to upload `source_dir`) and environment variables
    related to framework.

    Args:
        estimator (sagemaker.estimator.Estimator): The framework estimator to get information from and update.
        s3_operations (dict): The dict to specify s3 operations (upload `source_dir`).
    """
    bucket = estimator.code_location if estimator.code_location else estimator.sagemaker_session._default_bucket
    key = '{}/source/sourcedir.tar.gz'.format(estimator._current_job_name)
    script = os.path.basename(estimator.entry_point)
    if estimator.source_dir and estimator.source_dir.lower().startswith(
            's3://'):
        code_dir = estimator.source_dir
        estimator.uploaded_code = fw_utils.UploadedCode(s3_prefix=code_dir,
                                                        script_name=script)
    else:
        code_dir = 's3://{}/{}'.format(bucket, key)
        estimator.uploaded_code = fw_utils.UploadedCode(s3_prefix=code_dir,
                                                        script_name=script)
        s3_operations['S3Upload'] = [{
            'Path': estimator.source_dir or script,
            'Bucket': bucket,
            'Key': key,
            'Tar': True
        }]
    estimator._hyperparameters[sagemaker.model.DIR_PARAM_NAME] = code_dir
    estimator._hyperparameters[sagemaker.model.SCRIPT_PARAM_NAME] = script
    estimator._hyperparameters[sagemaker.model.CLOUDWATCH_METRICS_PARAM_NAME] = \
        estimator.enable_cloudwatch_metrics
    estimator._hyperparameters[
        sagemaker.model.
        CONTAINER_LOG_LEVEL_PARAM_NAME] = estimator.container_log_level
    estimator._hyperparameters[
        sagemaker.model.JOB_NAME_PARAM_NAME] = estimator._current_job_name
    estimator._hyperparameters[sagemaker.model.SAGEMAKER_REGION_PARAM_NAME] = \
        estimator.sagemaker_session.boto_region_name
def test_tar_and_upload_dir_s3_without_kms_with_overridden_settings(utils, sagemaker_session):
    bucket = "mybucket"
    s3_key_prefix = "something/source"
    script = "inference.py"
    settings = SessionSettings(encrypt_repacked_artifacts=False)
    result = fw_utils.tar_and_upload_dir(
        sagemaker_session, bucket, s3_key_prefix, script, settings=settings
    )

    assert result == fw_utils.UploadedCode(
        "s3://{}/{}/sourcedir.tar.gz".format(bucket, s3_key_prefix), script
    )

    obj = sagemaker_session.resource("s3").Object("", "")
    obj.upload_file.assert_called_with(utils.create_tar_file(), ExtraArgs=None)
Example #17
0
def test_tar_and_upload_dir_with_many_folders(sagemaker_session, tmpdir):
    file_tree(tmpdir, ["src-dir/a/b", "src-dir/a/b2", "common/x/y", "common/x/y2", "t/y/z"])
    source_dir = os.path.join(str(tmpdir), "src-dir")
    dependencies = [os.path.join(str(tmpdir), "common"), os.path.join(str(tmpdir), "t", "y", "z")]

    with patch("shutil.rmtree"):
        result = fw_utils.tar_and_upload_dir(
            sagemaker_session, "bucket", "prefix", "pipeline.py", source_dir, dependencies
        )

    assert result == fw_utils.UploadedCode(
        s3_prefix="s3://bucket/prefix/sourcedir.tar.gz", script_name="pipeline.py"
    )

    assert {"/a/b", "/a/b2", "/common/x/y", "/common/x/y2", "/z"} == list_source_dir_files(
        sagemaker_session, tmpdir
    )
Example #18
0
def test_test_tar_and_upload_dir_with_subfolders(sagemaker_session, tmpdir):
    file_tree(tmpdir, ["a/b/c", "a/b/c2"])
    root = file_tree(tmpdir, ["x/y/z", "x/y/z2"])

    with patch("shutil.rmtree"):
        result = fw_utils.tar_and_upload_dir(
            sagemaker_session,
            "bucket",
            "prefix",
            "b/c",
            os.path.join(root, "a"),
            [os.path.join(root, "x")],
        )

    assert result == fw_utils.UploadedCode(
        s3_prefix="s3://bucket/prefix/sourcedir.tar.gz", script_name="b/c")

    assert {"/b/c", "/b/c2", "/x/y/z",
            "/x/y/z2"} == list_source_dir_files(sagemaker_session, tmpdir)
Example #19
0
def update_submit_s3_uri(estimator, job_name):
    """Updated the S3 URI of the framework source directory in given estimator.

    Args:
        estimator (sagemaker.estimator.Framework): The Framework estimator to update.
        job_name (str): The new job name included in the submit S3 URI

    Returns:
        str: The updated S3 URI of framework source directory
    """
    if estimator.uploaded_code is None:
        return

    pattern = r'(?<=/)[^/]+?(?=/source/sourcedir.tar.gz)'

    # update the S3 URI with the latest training job.
    # s3://path/old_job/source/sourcedir.tar.gz will become s3://path/new_job/source/sourcedir.tar.gz
    submit_uri = estimator.uploaded_code.s3_prefix
    submit_uri = re.sub(pattern, job_name, submit_uri)
    script_name = estimator.uploaded_code.script_name
    estimator.uploaded_code = fw_utils.UploadedCode(submit_uri, script_name)
    def _upload_code(self, key_prefix, repack=False):
        """
        Args:
            key_prefix:
            repack:
        """
        local_code = utils.get_config_value("local.local_code",
                                            self.sagemaker_session.config)
        if self.sagemaker_session.local_mode and local_code:
            self.uploaded_code = None
        elif not repack:
            bucket = self.bucket or self.sagemaker_session.default_bucket()
            self.uploaded_code = fw_utils.tar_and_upload_dir(
                session=self.sagemaker_session.boto_session,
                bucket=bucket,
                s3_key_prefix=key_prefix,
                script=self.entry_point,
                directory=self.source_dir,
                dependencies=self.dependencies,
            )

        if repack:
            bucket = self.bucket or self.sagemaker_session.default_bucket()
            repacked_model_data = "s3://" + "/".join(
                [bucket, key_prefix, "model.tar.gz"])

            utils.repack_model(
                inference_script=self.entry_point,
                source_directory=self.source_dir,
                dependencies=self.dependencies,
                model_uri=self.model_data,
                repacked_model_uri=repacked_model_data,
                sagemaker_session=self.sagemaker_session,
                kms_key=self.model_kms_key,
            )

            self.repacked_model_data = repacked_model_data
            self.uploaded_code = fw_utils.UploadedCode(
                s3_prefix=self.repacked_model_data,
                script_name=os.path.basename(self.entry_point))