def test_attach_wrong_framework(sagemaker_session): returned_job_description = {'AlgorithmSpecification': {'TrainingInputMode': 'File', 'TrainingImage': '1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-py2-cpu:1.0'}, 'HyperParameters': {'sagemaker_submit_directory': '"s3://some/sourcedir.tar.gz"', 'sagemaker_program': '"iris-dnn-classifier.py"', 'sagemaker_enable_cloudwatch_metrics': 'false', 'sagemaker_container_log_level': '"logging.INFO"', 'training_steps': '100'}, 'RoleArn': 'arn:aws:iam::366:role/SageMakerRole', 'ResourceConfig': {'VolumeSizeInGB': 30, 'InstanceCount': 1, 'InstanceType': 'ml.c4.xlarge'}, 'StoppingCondition': {'MaxRuntimeInSeconds': 24 * 60 * 60}, 'TrainingJobName': 'neo', 'TrainingJobStatus': 'Completed', 'OutputDataConfig': {'KmsKeyId': '', 'S3OutputPath': 's3://place/output/neo'}, 'TrainingJobOutput': {'S3TrainingJobOutput': 's3://here/output.tar.gz'}} sagemaker_session.sagemaker_client.describe_training_job = Mock(name='describe_training_job', return_value=returned_job_description) with pytest.raises(ValueError) as error: TensorFlow.attach(training_job_name='neo', sagemaker_session=sagemaker_session) assert "didn't use image for requested framework" in str(error)
def test_attach_wrong_framework(sagemaker_session): returned_job_description = { "AlgorithmSpecification": { "TrainingInputMode": "File", "TrainingImage": "1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-py2-cpu:1.0", }, "HyperParameters": { "sagemaker_submit_directory": '"s3://some/sourcedir.tar.gz"', "sagemaker_program": '"iris-dnn-classifier.py"', "sagemaker_enable_cloudwatch_metrics": "false", "sagemaker_container_log_level": '"logging.INFO"', "training_steps": "100", }, "RoleArn": "arn:aws:iam::366:role/SageMakerRole", "ResourceConfig": { "VolumeSizeInGB": 30, "InstanceCount": 1, "InstanceType": "ml.c4.xlarge", }, "StoppingCondition": {"MaxRuntimeInSeconds": 24 * 60 * 60}, "TrainingJobName": "neo", "TrainingJobStatus": "Completed", "TrainingJobArn": "arn:aws:sagemaker:us-west-2:336:training-job/neo", "OutputDataConfig": {"KmsKeyId": "", "S3OutputPath": "s3://place/output/neo"}, "TrainingJobOutput": {"S3TrainingJobOutput": "s3://here/output.tar.gz"}, } sagemaker_session.sagemaker_client.describe_training_job = Mock( name="describe_training_job", return_value=returned_job_description ) with pytest.raises(ValueError) as error: TensorFlow.attach(training_job_name="neo", sagemaker_session=sagemaker_session) assert "didn't use image for requested framework" in str(error)
def test_mnist_async(sagemaker_session): estimator = TensorFlow(entry_point=SCRIPT, role=ROLE, train_instance_count=1, train_instance_type='ml.c5.4xlarge', sagemaker_session=sagemaker_session, py_version='py3', framework_version=TensorFlow.LATEST_VERSION, base_job_name=unique_name_from_base('test-tf-sm-mnist'), tags=TAGS) inputs = estimator.sagemaker_session.upload_data( path=os.path.join(RESOURCE_PATH, 'data'), key_prefix='scriptmode/mnist') estimator.fit(inputs, wait=False) training_job_name = estimator.latest_training_job.name time.sleep(20) endpoint_name = training_job_name _assert_training_job_tags_match(sagemaker_session.sagemaker_client, estimator.latest_training_job.name, TAGS) with timeout.timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): estimator = TensorFlow.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.c4.xlarge', endpoint_name=endpoint_name) result = predictor.predict(np.zeros(784)) print('predict result: {}'.format(result)) _assert_endpoint_tags_match(sagemaker_session.sagemaker_client, predictor.endpoint, TAGS) _assert_model_tags_match(sagemaker_session.sagemaker_client, estimator.latest_training_job.name, TAGS)
def test_attach_new_repo_name(sagemaker_session, tf_version): training_image = '520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-tensorflow:{}-cpu-py2'.format( tf_version) rjd = { 'AlgorithmSpecification': { 'TrainingInputMode': 'File', 'TrainingImage': training_image }, 'HyperParameters': { 'sagemaker_submit_directory': '"s3://some/sourcedir.tar.gz"', 'checkpoint_path': '"s3://other/1508872349"', 'sagemaker_program': '"iris-dnn-classifier.py"', 'sagemaker_enable_cloudwatch_metrics': 'false', 'sagemaker_container_log_level': '"logging.INFO"', 'sagemaker_job_name': '"neo"', 'training_steps': '100', 'evaluation_steps': '10' }, 'RoleArn': 'arn:aws:iam::366:role/SageMakerRole', 'ResourceConfig': { 'VolumeSizeInGB': 30, 'InstanceCount': 1, 'InstanceType': 'ml.c4.xlarge' }, 'StoppingCondition': { 'MaxRuntimeInSeconds': 24 * 60 * 60 }, 'TrainingJobName': 'neo', 'TrainingJobStatus': 'Completed', 'OutputDataConfig': { 'KmsKeyId': '', 'S3OutputPath': 's3://place/output/neo' }, 'TrainingJobOutput': { 'S3TrainingJobOutput': 's3://here/output.tar.gz' } } sagemaker_session.sagemaker_client.describe_training_job = Mock( name='describe_training_job', return_value=rjd) estimator = TensorFlow.attach(training_job_name='neo', sagemaker_session=sagemaker_session) assert estimator.latest_training_job.job_name == 'neo' assert estimator.py_version == 'py2' assert estimator.framework_version == tf_version assert estimator.role == 'arn:aws:iam::366:role/SageMakerRole' assert estimator.train_instance_count == 1 assert estimator.train_max_run == 24 * 60 * 60 assert estimator.input_mode == 'File' assert estimator.training_steps == 100 assert estimator.evaluation_steps == 10 assert estimator.input_mode == 'File' assert estimator.base_job_name == 'neo' assert estimator.output_path == 's3://place/output/neo' assert estimator.output_kms_key == '' assert estimator.hyperparameters()['training_steps'] == '100' assert estimator.source_dir == 's3://some/sourcedir.tar.gz' assert estimator.entry_point == 'iris-dnn-classifier.py' assert estimator.checkpoint_path == 's3://other/1508872349' assert estimator.train_image() == training_image
def test_attach(sagemaker_session, tf_version): training_image = "1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-tensorflow-py2-cpu:{}-cpu-py2".format( tf_version) rjd = { "AlgorithmSpecification": { "TrainingInputMode": "File", "TrainingImage": training_image }, "HyperParameters": { "sagemaker_submit_directory": '"s3://some/sourcedir.tar.gz"', "checkpoint_path": '"s3://other/1508872349"', "sagemaker_program": '"iris-dnn-classifier.py"', "sagemaker_enable_cloudwatch_metrics": "false", "sagemaker_container_log_level": '"logging.INFO"', "sagemaker_job_name": '"neo"', "training_steps": "100", "evaluation_steps": "10", }, "RoleArn": "arn:aws:iam::366:role/SageMakerRole", "ResourceConfig": { "VolumeSizeInGB": 30, "InstanceCount": 1, "InstanceType": "ml.c4.xlarge", }, "StoppingCondition": { "MaxRuntimeInSeconds": 24 * 60 * 60 }, "TrainingJobName": "neo", "TrainingJobStatus": "Completed", "TrainingJobArn": "arn:aws:sagemaker:us-west-2:336:training-job/neo", "OutputDataConfig": { "KmsKeyId": "", "S3OutputPath": "s3://place/output/neo" }, "TrainingJobOutput": { "S3TrainingJobOutput": "s3://here/output.tar.gz" }, } sagemaker_session.sagemaker_client.describe_training_job = Mock( name="describe_training_job", return_value=rjd) estimator = TensorFlow.attach(training_job_name="neo", sagemaker_session=sagemaker_session) assert estimator.latest_training_job.job_name == "neo" assert estimator.py_version == "py2" assert estimator.framework_version == tf_version assert estimator.role == "arn:aws:iam::366:role/SageMakerRole" assert estimator.train_instance_count == 1 assert estimator.train_max_run == 24 * 60 * 60 assert estimator.input_mode == "File" assert estimator.training_steps == 100 assert estimator.evaluation_steps == 10 assert estimator.input_mode == "File" assert estimator.base_job_name == "neo" assert estimator.output_path == "s3://place/output/neo" assert estimator.output_kms_key == "" assert estimator.hyperparameters()["training_steps"] == "100" assert estimator.source_dir == "s3://some/sourcedir.tar.gz" assert estimator.entry_point == "iris-dnn-classifier.py" assert estimator.checkpoint_path == "s3://other/1508872349"
def test_attach_custom_image(sagemaker_session): training_image = '1.dkr.ecr.us-west-2.amazonaws.com/tensorflow_with_custom_binary:1.0' rjd = { 'AlgorithmSpecification': { 'TrainingInputMode': 'File', 'TrainingImage': training_image}, 'HyperParameters': { 'sagemaker_submit_directory': '"s3://some/sourcedir.tar.gz"', 'checkpoint_path': '"s3://other/1508872349"', 'sagemaker_program': '"iris-dnn-classifier.py"', 'sagemaker_enable_cloudwatch_metrics': 'false', 'sagemaker_container_log_level': '"logging.INFO"', 'sagemaker_job_name': '"neo"', 'training_steps': '100', 'evaluation_steps': '10'}, 'RoleArn': 'arn:aws:iam::366:role/SageMakerRole', 'ResourceConfig': { 'VolumeSizeInGB': 30, 'InstanceCount': 1, 'InstanceType': 'ml.c4.xlarge'}, 'StoppingCondition': {'MaxRuntimeInSeconds': 24 * 60 * 60}, 'TrainingJobName': 'neo', 'TrainingJobStatus': 'Completed', 'OutputDataConfig': {'KmsKeyId': '', 'S3OutputPath': 's3://place/output/neo'}, 'TrainingJobOutput': {'S3TrainingJobOutput': 's3://here/output.tar.gz'}} sagemaker_session.sagemaker_client.describe_training_job = Mock(name='describe_training_job', return_value=rjd) estimator = TensorFlow.attach(training_job_name='neo', sagemaker_session=sagemaker_session) assert estimator.image_name == training_image assert estimator.train_image() == training_image
def test_tf_async(sagemaker_session): with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): script_path = os.path.join(DATA_DIR, 'iris', 'iris-dnn-classifier.py') estimator = TensorFlow(entry_point=script_path, role='SageMakerRole', training_steps=1, evaluation_steps=1, hyperparameters={'input_tensor_name': 'inputs'}, train_instance_count=1, train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session, base_job_name='test-tf') inputs = estimator.sagemaker_session.upload_data( path=DATA_PATH, key_prefix='integ-test-data/tf_iris') estimator.fit(inputs, wait=False) training_job_name = estimator.latest_training_job.name time.sleep(20) endpoint_name = training_job_name with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): estimator = TensorFlow.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) json_predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.c4.xlarge', endpoint_name=endpoint_name) result = json_predictor.predict([6.4, 3.2, 4.5, 1.5]) print('predict result: {}'.format(result))
def test_tf_async(sagemaker_session): with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES): script_path = os.path.join(DATA_DIR, 'iris', 'iris-dnn-classifier.py') estimator = TensorFlow(entry_point=script_path, role='SageMakerRole', training_steps=1, evaluation_steps=1, hyperparameters={'input_tensor_name': 'inputs'}, train_instance_count=1, train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session, base_job_name='test-tf') inputs = estimator.sagemaker_session.upload_data(path=DATA_PATH, key_prefix='integ-test-data/tf_iris') estimator.fit(inputs, wait=False) training_job_name = estimator.latest_training_job.name time.sleep(20) endpoint_name = training_job_name with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): estimator = TensorFlow.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) json_predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.c4.xlarge', endpoint_name=endpoint_name) result = json_predictor.predict([6.4, 3.2, 4.5, 1.5]) print('predict result: {}'.format(result))
def test_attach_custom_image(sagemaker_session): training_image = "1.dkr.ecr.us-west-2.amazonaws.com/tensorflow_with_custom_binary:1.0" rjd = { "AlgorithmSpecification": {"TrainingInputMode": "File", "TrainingImage": training_image}, "HyperParameters": { "sagemaker_submit_directory": '"s3://some/sourcedir.tar.gz"', "checkpoint_path": '"s3://other/1508872349"', "sagemaker_program": '"iris-dnn-classifier.py"', "sagemaker_enable_cloudwatch_metrics": "false", "sagemaker_container_log_level": '"logging.INFO"', "sagemaker_job_name": '"neo"', "training_steps": "100", "evaluation_steps": "10", }, "RoleArn": "arn:aws:iam::366:role/SageMakerRole", "ResourceConfig": { "VolumeSizeInGB": 30, "InstanceCount": 1, "InstanceType": "ml.c4.xlarge", }, "StoppingCondition": {"MaxRuntimeInSeconds": 24 * 60 * 60}, "TrainingJobName": "neo", "TrainingJobStatus": "Completed", "TrainingJobArn": "arn:aws:sagemaker:us-west-2:336:training-job/neo", "OutputDataConfig": {"KmsKeyId": "", "S3OutputPath": "s3://place/output/neo"}, "TrainingJobOutput": {"S3TrainingJobOutput": "s3://here/output.tar.gz"}, } sagemaker_session.sagemaker_client.describe_training_job = Mock( name="describe_training_job", return_value=rjd ) estimator = TensorFlow.attach(training_job_name="neo", sagemaker_session=sagemaker_session) assert estimator.image_name == training_image assert estimator.train_image() == training_image
def test_attach_custom_image(sagemaker_session): training_image = '1.dkr.ecr.us-west-2.amazonaws.com/tensorflow_with_custom_binary:1.0' rjd = { 'AlgorithmSpecification': { 'TrainingInputMode': 'File', 'TrainingImage': training_image}, 'HyperParameters': { 'sagemaker_submit_directory': '"s3://some/sourcedir.tar.gz"', 'checkpoint_path': '"s3://other/1508872349"', 'sagemaker_program': '"iris-dnn-classifier.py"', 'sagemaker_enable_cloudwatch_metrics': 'false', 'sagemaker_container_log_level': '"logging.INFO"', 'sagemaker_job_name': '"neo"', 'training_steps': '100', 'evaluation_steps': '10'}, 'RoleArn': 'arn:aws:iam::366:role/SageMakerRole', 'ResourceConfig': { 'VolumeSizeInGB': 30, 'InstanceCount': 1, 'InstanceType': 'ml.c4.xlarge'}, 'StoppingCondition': {'MaxRuntimeInSeconds': 24 * 60 * 60}, 'TrainingJobName': 'neo', 'TrainingJobStatus': 'Completed', 'OutputDataConfig': {'KmsKeyId': '', 'S3OutputPath': 's3://place/output/neo'}, 'TrainingJobOutput': {'S3TrainingJobOutput': 's3://here/output.tar.gz'}} sagemaker_session.sagemaker_client.describe_training_job = Mock(name='describe_training_job', return_value=rjd) estimator = TensorFlow.attach(training_job_name='neo', sagemaker_session=sagemaker_session) assert estimator.image_name == training_image assert estimator.train_image() == training_image
def test_attach(sagemaker_session, tensorflow_training_version, tensorflow_training_py_version): if Version(tensorflow_training_version) > Version("1.12"): pytest.skip("framework_name_from_image doesn't infer info from DLC image URIs.") training_image = image_uris.retrieve( "tensorflow", region=REGION, version=tensorflow_training_version, py_version=tensorflow_training_py_version, instance_type="ml.c4.xlarge", image_scope="training", ) rjd = { "AlgorithmSpecification": {"TrainingInputMode": "File", "TrainingImage": training_image}, "HyperParameters": { "sagemaker_submit_directory": '"s3://some/sourcedir.tar.gz"', "sagemaker_program": '"iris-dnn-classifier.py"', "sagemaker_container_log_level": '"logging.INFO"', "sagemaker_job_name": '"neo"', }, "RoleArn": "arn:aws:iam::366:role/SageMakerRole", "ResourceConfig": { "VolumeSizeInGB": 30, "InstanceCount": 1, "InstanceType": "ml.c4.xlarge", }, "StoppingCondition": {"MaxRuntimeInSeconds": 24 * 60 * 60}, "TrainingJobName": "neo", "TrainingJobStatus": "Completed", "TrainingJobArn": "arn:aws:sagemaker:us-west-2:336:training-job/neo", "OutputDataConfig": {"KmsKeyId": "", "S3OutputPath": "s3://place/output/neo"}, "TrainingJobOutput": {"S3TrainingJobOutput": "s3://here/output.tar.gz"}, } sagemaker_session.sagemaker_client.describe_training_job = Mock( name="describe_training_job", return_value=rjd ) estimator = TensorFlow.attach(training_job_name="neo", sagemaker_session=sagemaker_session) assert estimator.latest_training_job.job_name == "neo" assert estimator.py_version == tensorflow_training_py_version assert estimator.framework_version == tensorflow_training_version assert estimator.role == "arn:aws:iam::366:role/SageMakerRole" assert estimator.instance_count == 1 assert estimator.max_run == 24 * 60 * 60 assert estimator.input_mode == "File" assert estimator.input_mode == "File" assert estimator.base_job_name == "neo" assert estimator.output_path == "s3://place/output/neo" assert estimator.output_kms_key == "" assert estimator.hyperparameters() is not None assert estimator.source_dir == "s3://some/sourcedir.tar.gz" assert estimator.entry_point == "iris-dnn-classifier.py" assert estimator.training_image_uri() == training_image
def test_attach_wrong_framework(sagemaker_session): returned_job_description = { 'AlgorithmSpecification': { 'TrainingInputMode': 'File', 'TrainingImage': '1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-py2-cpu:1.0' }, 'HyperParameters': { 'sagemaker_submit_directory': '"s3://some/sourcedir.tar.gz"', 'sagemaker_program': '"iris-dnn-classifier.py"', 'sagemaker_enable_cloudwatch_metrics': 'false', 'sagemaker_container_log_level': '"logging.INFO"', 'training_steps': '100' }, 'RoleArn': 'arn:aws:iam::366:role/SageMakerRole', 'ResourceConfig': {'VolumeSizeInGB': 30, 'InstanceCount': 1, 'InstanceType': 'ml.c4.xlarge' }, 'StoppingCondition': { 'MaxRuntimeInSeconds': 24 * 60 * 60 }, 'TrainingJobName': 'neo', 'TrainingJobStatus': 'Completed', 'OutputDataConfig': { 'KmsKeyId': '', 'S3OutputPath': 's3://place/output/neo' }, 'TrainingJobOutput': { 'S3TrainingJobOutput': 's3://here/output.tar.gz' } } sagemaker_session.sagemaker_client.describe_training_job = Mock(name='describe_training_job', return_value=returned_job_description) with pytest.raises(ValueError) as error: TensorFlow.attach(training_job_name='neo', sagemaker_session=sagemaker_session) assert "didn't use image for requested framework" in str(error)
def test_attach(sagemaker_session, tf_version): training_image = '1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-tensorflow-py2-cpu:{}-cpu-py2'.format(tf_version) rjd = { 'AlgorithmSpecification': { 'TrainingInputMode': 'File', 'TrainingImage': training_image }, 'HyperParameters': { 'sagemaker_submit_directory': '"s3://some/sourcedir.tar.gz"', 'checkpoint_path': '"s3://other/1508872349"', 'sagemaker_program': '"iris-dnn-classifier.py"', 'sagemaker_enable_cloudwatch_metrics': 'false', 'sagemaker_container_log_level': '"logging.INFO"', 'sagemaker_job_name': '"neo"', 'training_steps': '100', 'evaluation_steps': '10' }, 'RoleArn': 'arn:aws:iam::366:role/SageMakerRole', 'ResourceConfig': { 'VolumeSizeInGB': 30, 'InstanceCount': 1, 'InstanceType': 'ml.c4.xlarge' }, 'StoppingCondition': {'MaxRuntimeInSeconds': 24 * 60 * 60}, 'TrainingJobName': 'neo', 'TrainingJobStatus': 'Completed', 'OutputDataConfig': {'KmsKeyId': '', 'S3OutputPath': 's3://place/output/neo'}, 'TrainingJobOutput': {'S3TrainingJobOutput': 's3://here/output.tar.gz'}} sagemaker_session.sagemaker_client.describe_training_job = Mock(name='describe_training_job', return_value=rjd) estimator = TensorFlow.attach(training_job_name='neo', sagemaker_session=sagemaker_session) assert estimator.latest_training_job.job_name == 'neo' assert estimator.py_version == 'py2' assert estimator.framework_version == tf_version assert estimator.role == 'arn:aws:iam::366:role/SageMakerRole' assert estimator.train_instance_count == 1 assert estimator.train_max_run == 24 * 60 * 60 assert estimator.input_mode == 'File' assert estimator.training_steps == 100 assert estimator.evaluation_steps == 10 assert estimator.input_mode == 'File' assert estimator.base_job_name == 'neo' assert estimator.output_path == 's3://place/output/neo' assert estimator.output_kms_key == '' assert estimator.hyperparameters()['training_steps'] == '100' assert estimator.source_dir == 's3://some/sourcedir.tar.gz' assert estimator.entry_point == 'iris-dnn-classifier.py' assert estimator.checkpoint_path == 's3://other/1508872349'
def test_mnist_async(sagemaker_session, cpu_instance_type, tf_full_version, tf_full_py_version): if tf_full_version == "2.7.0": tf_full_version = "2.7" estimator = TensorFlow( entry_point=SCRIPT, source_dir=MNIST_RESOURCE_PATH, role=ROLE, instance_count=1, instance_type="ml.c5.4xlarge", sagemaker_session=sagemaker_session, framework_version=tf_full_version, py_version=tf_full_py_version, tags=TAGS, ) inputs = estimator.sagemaker_session.upload_data( path=os.path.join(MNIST_RESOURCE_PATH, "data"), key_prefix="scriptmode/mnist") estimator.fit(inputs=inputs, wait=False, job_name=unique_name_from_base("test-tf-sm-async")) training_job_name = estimator.latest_training_job.name time.sleep(20) endpoint_name = training_job_name _assert_training_job_tags_match(sagemaker_session.sagemaker_client, estimator.latest_training_job.name, TAGS) with tests.integ.timeout.timeout_and_delete_endpoint_by_name( endpoint_name, sagemaker_session): estimator = TensorFlow.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) model_name = "model-mnist-async" predictor = estimator.deploy( initial_instance_count=1, instance_type=cpu_instance_type, endpoint_name=endpoint_name, model_name=model_name, ) result = predictor.predict(np.zeros(784)) print("predict result: {}".format(result)) _assert_endpoint_tags_match(sagemaker_session.sagemaker_client, predictor.endpoint_name, TAGS) _assert_model_tags_match(sagemaker_session.sagemaker_client, model_name, TAGS) _assert_model_name_match(sagemaker_session.sagemaker_client, endpoint_name, model_name)
def test_mnist_async(sagemaker_session, cpu_instance_type): estimator = TensorFlow( entry_point=SCRIPT, role=ROLE, train_instance_count=1, train_instance_type="ml.c5.4xlarge", py_version=tests.integ.PYTHON_VERSION, sagemaker_session=sagemaker_session, script_mode=True, # testing py-sdk functionality, no need to run against all TF versions framework_version=TensorFlow.LATEST_VERSION, tags=TAGS, ) inputs = estimator.sagemaker_session.upload_data( path=os.path.join(MNIST_RESOURCE_PATH, "data"), key_prefix="scriptmode/mnist") estimator.fit(inputs=inputs, wait=False, job_name=unique_name_from_base("test-tf-sm-async")) training_job_name = estimator.latest_training_job.name time.sleep(20) endpoint_name = training_job_name _assert_training_job_tags_match(sagemaker_session.sagemaker_client, estimator.latest_training_job.name, TAGS) with tests.integ.timeout.timeout_and_delete_endpoint_by_name( endpoint_name, sagemaker_session): estimator = TensorFlow.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session) model_name = "model-mnist-async" predictor = estimator.deploy( initial_instance_count=1, instance_type=cpu_instance_type, endpoint_name=endpoint_name, model_name=model_name, ) result = predictor.predict(np.zeros(784)) print("predict result: {}".format(result)) _assert_endpoint_tags_match(sagemaker_session.sagemaker_client, predictor.endpoint, TAGS) _assert_model_tags_match(sagemaker_session.sagemaker_client, model_name, TAGS) _assert_model_name_match(sagemaker_session.sagemaker_client, endpoint_name, model_name)