def prepare_framework_container_def(model, instance_type, s3_operations): """Prepare the framework model container information. Specify related S3 operations for Airflow to perform. (Upload `source_dir` ) Args: model (sagemaker.model.FrameworkModel): The framework model instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. s3_operations (dict): The dict to specify S3 operations (upload `source_dir` ). Returns: dict: The container information of this framework model. """ deploy_image = model.image if not deploy_image: region_name = model.sagemaker_session.boto_session.region_name deploy_image = fw_utils.create_image_uri( region_name, model.__framework_name__, instance_type, model.framework_version, model.py_version, ) base_name = utils.base_name_from_image(deploy_image) model.name = model.name or utils.name_from_base(base_name) bucket = model.bucket or model.sagemaker_session._default_bucket script = os.path.basename(model.entry_point) key = "{}/source/sourcedir.tar.gz".format(model.name) if model.source_dir and model.source_dir.lower().startswith("s3://"): code_dir = model.source_dir model.uploaded_code = fw_utils.UploadedCode(s3_prefix=code_dir, script_name=script) else: code_dir = "s3://{}/{}".format(bucket, key) model.uploaded_code = fw_utils.UploadedCode(s3_prefix=code_dir, script_name=script) s3_operations["S3Upload"] = [{ "Path": model.source_dir or script, "Bucket": bucket, "Key": key, "Tar": True }] deploy_env = dict(model.env) deploy_env.update(model._framework_env_vars()) try: if model.model_server_workers: deploy_env[ sagemaker.model.MODEL_SERVER_WORKERS_PARAM_NAME.upper()] = str( model.model_server_workers) except AttributeError: # This applies to a FrameworkModel which is not SageMaker Deep Learning Framework Model pass return sagemaker.container_def(deploy_image, model.model_data, deploy_env)
def prepare_framework(estimator, s3_operations): """Prepare S3 operations (specify where to upload `source_dir` ) and environment variables related to framework. Args: estimator (sagemaker.estimator.Estimator): The framework estimator to get information from and update. s3_operations (dict): The dict to specify s3 operations (upload `source_dir` ). """ if estimator.code_location is not None: bucket, key = fw_utils.parse_s3_url(estimator.code_location) key = os.path.join(key, estimator._current_job_name, "source", "sourcedir.tar.gz") elif estimator.uploaded_code is not None: bucket, key = fw_utils.parse_s3_url(estimator.uploaded_code.s3_prefix) else: bucket = estimator.sagemaker_session._default_bucket key = os.path.join(estimator._current_job_name, "source", "sourcedir.tar.gz") script = os.path.basename(estimator.entry_point) if estimator.source_dir and estimator.source_dir.lower().startswith( "s3://"): code_dir = estimator.source_dir estimator.uploaded_code = fw_utils.UploadedCode(s3_prefix=code_dir, script_name=script) else: code_dir = "s3://{}/{}".format(bucket, key) estimator.uploaded_code = fw_utils.UploadedCode(s3_prefix=code_dir, script_name=script) s3_operations["S3Upload"] = [{ "Path": estimator.source_dir or estimator.entry_point, "Bucket": bucket, "Key": key, "Tar": True, }] estimator._hyperparameters[sagemaker.model.DIR_PARAM_NAME] = code_dir estimator._hyperparameters[sagemaker.model.SCRIPT_PARAM_NAME] = script estimator._hyperparameters[ sagemaker.model. CLOUDWATCH_METRICS_PARAM_NAME] = estimator.enable_cloudwatch_metrics estimator._hyperparameters[ sagemaker.model. CONTAINER_LOG_LEVEL_PARAM_NAME] = estimator.container_log_level estimator._hyperparameters[ sagemaker.model.JOB_NAME_PARAM_NAME] = estimator._current_job_name estimator._hyperparameters[ sagemaker.model. SAGEMAKER_REGION_PARAM_NAME] = estimator.sagemaker_session.boto_region_name
def test_tar_and_upload_dir_s3(sagemaker_session): bucket = 'mybucker' s3_key_prefix = 'something/source' script = 'mnist.py' directory = 's3://m' result = fw_utils.tar_and_upload_dir(sagemaker_session, bucket, s3_key_prefix, script, directory) assert result == fw_utils.UploadedCode('s3://m', 'mnist.py')
def test_tar_and_upload_dir_not_s3(sagemaker_session): bucket = 'mybucket' s3_key_prefix = 'something/source' script = os.path.basename(__file__) directory = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) result = fw_utils.tar_and_upload_dir(sagemaker_session, bucket, s3_key_prefix, script, directory) assert result == fw_utils.UploadedCode('s3://{}/{}/sourcedir.tar.gz'.format(bucket, s3_key_prefix), script)
def test_tar_and_upload_dir_s3(sagemaker_session): bucket = "mybucket" s3_key_prefix = "something/source" script = "mnist.py" directory = "s3://m" result = fw_utils.tar_and_upload_dir(sagemaker_session, bucket, s3_key_prefix, script, directory) assert result == fw_utils.UploadedCode("s3://m", "mnist.py")
def test_tar_and_upload_dir_with_directories_and_files(sagemaker_session, tmpdir): file_tree(tmpdir, ['src-dir/a/b', 'src-dir/a/b2', 'src-dir/x/y', 'src-dir/x/y2', 'src-dir/z']) source_dir = os.path.join(str(tmpdir), 'src-dir') with patch('shutil.rmtree'): result = fw_utils.tar_and_upload_dir(sagemaker_session, 'bucket', 'prefix', 'a/b', source_dir) assert result == fw_utils.UploadedCode(s3_prefix='s3://bucket/prefix/sourcedir.tar.gz', script_name='a/b') assert {'/a/b', '/a/b2', '/x/y', '/x/y2', '/z'} == list_source_dir_files(sagemaker_session, tmpdir)
def test_tar_and_upload_dir_with_directory_and_files(sagemaker_session, tmpdir): file_tree(tmpdir, ['src-dir/train.py', 'src-dir/laucher', 'src-dir/module/__init__.py']) source_dir = os.path.join(str(tmpdir), 'src-dir') with patch('shutil.rmtree'): result = fw_utils.tar_and_upload_dir(sagemaker_session, 'bucket', 'prefix', 'train.py', source_dir) assert result == fw_utils.UploadedCode(s3_prefix='s3://bucket/prefix/sourcedir.tar.gz', script_name='train.py') assert {'/laucher', '/module/__init__.py', '/train.py'} == list_source_dir_files(sagemaker_session, tmpdir)
def test_tar_and_upload_dir_no_directory_only_entrypoint(sagemaker_session, tmpdir): source_dir = file_tree(tmpdir, ['train.py', 'not_me.py']) entrypoint = os.path.join(source_dir, 'train.py') with patch('shutil.rmtree'): result = fw_utils.tar_and_upload_dir(sagemaker_session, 'bucket', 'prefix', entrypoint, None) assert result == fw_utils.UploadedCode(s3_prefix='s3://bucket/prefix/sourcedir.tar.gz', script_name='train.py') assert {'/train.py'} == list_source_dir_files(sagemaker_session, tmpdir)
def test_test_tar_and_upload_dir_with_subfolders(sagemaker_session, tmpdir): file_tree(tmpdir, ['a/b/c', 'a/b/c2']) root = file_tree(tmpdir, ['x/y/z', 'x/y/z2']) with patch('shutil.rmtree'): result = fw_utils.tar_and_upload_dir(sagemaker_session, 'bucket', 'prefix', 'b/c', os.path.join(root, 'a'), [os.path.join(root, 'x')]) assert result == fw_utils.UploadedCode(s3_prefix='s3://bucket/prefix/sourcedir.tar.gz', script_name='b/c') assert {'/b/c', '/b/c2', '/x/y/z', '/x/y/z2'} == list_source_dir_files(sagemaker_session, tmpdir)
def test_tar_and_upload_dir_s3_kms_enabled_by_default(utils, sagemaker_session): bucket = "mybucket" s3_key_prefix = "something/source" script = "inference.py" result = fw_utils.tar_and_upload_dir(sagemaker_session, bucket, s3_key_prefix, script) assert result == fw_utils.UploadedCode( "s3://{}/{}/sourcedir.tar.gz".format(bucket, s3_key_prefix), script ) extra_args = {"ServerSideEncryption": "aws:kms"} obj = sagemaker_session.resource("s3").Object("", "") obj.upload_file.assert_called_with(utils.create_tar_file(), ExtraArgs=extra_args)
def test_tar_and_upload_dir_with_many_folders(sagemaker_session, tmpdir): file_tree(tmpdir, ['src-dir/a/b', 'src-dir/a/b2', 'common/x/y', 'common/x/y2', 't/y/z']) source_dir = os.path.join(str(tmpdir), 'src-dir') dependencies = [os.path.join(str(tmpdir), 'common'), os.path.join(str(tmpdir), 't', 'y', 'z')] with patch('shutil.rmtree'): result = fw_utils.tar_and_upload_dir(sagemaker_session, 'bucket', 'prefix', 'pipeline.py', source_dir, dependencies) assert result == fw_utils.UploadedCode(s3_prefix='s3://bucket/prefix/sourcedir.tar.gz', script_name='pipeline.py') assert {'/a/b', '/a/b2', '/common/x/y', '/common/x/y2', '/z'} == list_source_dir_files(sagemaker_session, tmpdir)
def test_tar_and_upload_dir_with_directory(sagemaker_session, tmpdir): file_tree(tmpdir, ["src-dir/train.py"]) source_dir = os.path.join(str(tmpdir), "src-dir") with patch("shutil.rmtree"): result = fw_utils.tar_and_upload_dir(sagemaker_session, "bucket", "prefix", "train.py", source_dir) assert result == fw_utils.UploadedCode( s3_prefix="s3://bucket/prefix/sourcedir.tar.gz", script_name="train.py") assert {"/train.py"} == list_source_dir_files(sagemaker_session, tmpdir)
def test_tar_and_upload_dir_s3_with_kms(utils, sagemaker_session): result = fw_utils.tar_and_upload_dir(sagemaker_session, 'mybucker', 'something/source', 'mnist.py', kms_key='kms-key') assert result == fw_utils.UploadedCode('s3://mybucker/something/source/sourcedir.tar.gz', 'mnist.py') extra_args = {'ServerSideEncryption': 'aws:kms', 'SSEKMSKeyId': 'kms-key'} obj = sagemaker_session.resource('s3').Object('', '') obj.upload_file.assert_called_with(utils.create_tar_file(), ExtraArgs=extra_args)
def test_tar_and_upload_dir_no_directory_only_entrypoint( sagemaker_session, tmpdir): source_dir = file_tree(tmpdir, ["train.py", "not_me.py"]) entrypoint = os.path.join(source_dir, "train.py") with patch("shutil.rmtree"): result = fw_utils.tar_and_upload_dir(sagemaker_session, "bucket", "prefix", entrypoint, None) assert result == fw_utils.UploadedCode( s3_prefix="s3://bucket/prefix/sourcedir.tar.gz", script_name="train.py") assert {"/train.py"} == list_source_dir_files(sagemaker_session, tmpdir)
def prepare_framework(estimator, s3_operations): """Prepare S3 operations (specify where to upload `source_dir`) and environment variables related to framework. Args: estimator (sagemaker.estimator.Estimator): The framework estimator to get information from and update. s3_operations (dict): The dict to specify s3 operations (upload `source_dir`). """ bucket = estimator.code_location if estimator.code_location else estimator.sagemaker_session._default_bucket key = '{}/source/sourcedir.tar.gz'.format(estimator._current_job_name) script = os.path.basename(estimator.entry_point) if estimator.source_dir and estimator.source_dir.lower().startswith( 's3://'): code_dir = estimator.source_dir estimator.uploaded_code = fw_utils.UploadedCode(s3_prefix=code_dir, script_name=script) else: code_dir = 's3://{}/{}'.format(bucket, key) estimator.uploaded_code = fw_utils.UploadedCode(s3_prefix=code_dir, script_name=script) s3_operations['S3Upload'] = [{ 'Path': estimator.source_dir or script, 'Bucket': bucket, 'Key': key, 'Tar': True }] estimator._hyperparameters[sagemaker.model.DIR_PARAM_NAME] = code_dir estimator._hyperparameters[sagemaker.model.SCRIPT_PARAM_NAME] = script estimator._hyperparameters[sagemaker.model.CLOUDWATCH_METRICS_PARAM_NAME] = \ estimator.enable_cloudwatch_metrics estimator._hyperparameters[ sagemaker.model. CONTAINER_LOG_LEVEL_PARAM_NAME] = estimator.container_log_level estimator._hyperparameters[ sagemaker.model.JOB_NAME_PARAM_NAME] = estimator._current_job_name estimator._hyperparameters[sagemaker.model.SAGEMAKER_REGION_PARAM_NAME] = \ estimator.sagemaker_session.boto_region_name
def test_tar_and_upload_dir_s3_without_kms_with_overridden_settings(utils, sagemaker_session): bucket = "mybucket" s3_key_prefix = "something/source" script = "inference.py" settings = SessionSettings(encrypt_repacked_artifacts=False) result = fw_utils.tar_and_upload_dir( sagemaker_session, bucket, s3_key_prefix, script, settings=settings ) assert result == fw_utils.UploadedCode( "s3://{}/{}/sourcedir.tar.gz".format(bucket, s3_key_prefix), script ) obj = sagemaker_session.resource("s3").Object("", "") obj.upload_file.assert_called_with(utils.create_tar_file(), ExtraArgs=None)
def test_tar_and_upload_dir_with_many_folders(sagemaker_session, tmpdir): file_tree(tmpdir, ["src-dir/a/b", "src-dir/a/b2", "common/x/y", "common/x/y2", "t/y/z"]) source_dir = os.path.join(str(tmpdir), "src-dir") dependencies = [os.path.join(str(tmpdir), "common"), os.path.join(str(tmpdir), "t", "y", "z")] with patch("shutil.rmtree"): result = fw_utils.tar_and_upload_dir( sagemaker_session, "bucket", "prefix", "pipeline.py", source_dir, dependencies ) assert result == fw_utils.UploadedCode( s3_prefix="s3://bucket/prefix/sourcedir.tar.gz", script_name="pipeline.py" ) assert {"/a/b", "/a/b2", "/common/x/y", "/common/x/y2", "/z"} == list_source_dir_files( sagemaker_session, tmpdir )
def test_test_tar_and_upload_dir_with_subfolders(sagemaker_session, tmpdir): file_tree(tmpdir, ["a/b/c", "a/b/c2"]) root = file_tree(tmpdir, ["x/y/z", "x/y/z2"]) with patch("shutil.rmtree"): result = fw_utils.tar_and_upload_dir( sagemaker_session, "bucket", "prefix", "b/c", os.path.join(root, "a"), [os.path.join(root, "x")], ) assert result == fw_utils.UploadedCode( s3_prefix="s3://bucket/prefix/sourcedir.tar.gz", script_name="b/c") assert {"/b/c", "/b/c2", "/x/y/z", "/x/y/z2"} == list_source_dir_files(sagemaker_session, tmpdir)
def update_submit_s3_uri(estimator, job_name): """Updated the S3 URI of the framework source directory in given estimator. Args: estimator (sagemaker.estimator.Framework): The Framework estimator to update. job_name (str): The new job name included in the submit S3 URI Returns: str: The updated S3 URI of framework source directory """ if estimator.uploaded_code is None: return pattern = r'(?<=/)[^/]+?(?=/source/sourcedir.tar.gz)' # update the S3 URI with the latest training job. # s3://path/old_job/source/sourcedir.tar.gz will become s3://path/new_job/source/sourcedir.tar.gz submit_uri = estimator.uploaded_code.s3_prefix submit_uri = re.sub(pattern, job_name, submit_uri) script_name = estimator.uploaded_code.script_name estimator.uploaded_code = fw_utils.UploadedCode(submit_uri, script_name)
def _upload_code(self, key_prefix, repack=False): """ Args: key_prefix: repack: """ local_code = utils.get_config_value("local.local_code", self.sagemaker_session.config) if self.sagemaker_session.local_mode and local_code: self.uploaded_code = None elif not repack: bucket = self.bucket or self.sagemaker_session.default_bucket() self.uploaded_code = fw_utils.tar_and_upload_dir( session=self.sagemaker_session.boto_session, bucket=bucket, s3_key_prefix=key_prefix, script=self.entry_point, directory=self.source_dir, dependencies=self.dependencies, ) if repack: bucket = self.bucket or self.sagemaker_session.default_bucket() repacked_model_data = "s3://" + "/".join( [bucket, key_prefix, "model.tar.gz"]) utils.repack_model( inference_script=self.entry_point, source_directory=self.source_dir, dependencies=self.dependencies, model_uri=self.model_data, repacked_model_uri=repacked_model_data, sagemaker_session=self.sagemaker_session, kms_key=self.model_kms_key, ) self.repacked_model_data = repacked_model_data self.uploaded_code = fw_utils.UploadedCode( s3_prefix=self.repacked_model_data, script_name=os.path.basename(self.entry_point))