def test_attach_wrong_framework(sagemaker_session):
    returned_job_description = {'AlgorithmSpecification':
                                {'TrainingInputMode': 'File',
                                 'TrainingImage': '1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-py2-cpu:1.0'},
                                'HyperParameters':
                                    {'sagemaker_submit_directory': '"s3://some/sourcedir.tar.gz"',
                                     'sagemaker_program': '"iris-dnn-classifier.py"',
                                     'sagemaker_enable_cloudwatch_metrics': 'false',
                                     'sagemaker_container_log_level': '"logging.INFO"',
                                     'training_steps': '100'},
                                'RoleArn': 'arn:aws:iam::366:role/SageMakerRole',
                                'ResourceConfig':
                                    {'VolumeSizeInGB': 30,
                                     'InstanceCount': 1,
                                     'InstanceType': 'ml.c4.xlarge'},
                                'StoppingCondition': {'MaxRuntimeInSeconds': 24 * 60 * 60},
                                'TrainingJobName': 'neo',
                                'TrainingJobStatus': 'Completed',
                                'OutputDataConfig': {'KmsKeyId': '',
                                                     'S3OutputPath': 's3://place/output/neo'},
                                'TrainingJobOutput': {'S3TrainingJobOutput': 's3://here/output.tar.gz'}}
    sagemaker_session.sagemaker_client.describe_training_job = Mock(name='describe_training_job',
                                                                    return_value=returned_job_description)

    with pytest.raises(ValueError) as error:
        TensorFlow.attach(training_job_name='neo', sagemaker_session=sagemaker_session)
    assert "didn't use image for requested framework" in str(error)
Beispiel #2
0
def test_attach_wrong_framework(sagemaker_session):
    returned_job_description = {
        "AlgorithmSpecification": {
            "TrainingInputMode": "File",
            "TrainingImage": "1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-py2-cpu:1.0",
        },
        "HyperParameters": {
            "sagemaker_submit_directory": '"s3://some/sourcedir.tar.gz"',
            "sagemaker_program": '"iris-dnn-classifier.py"',
            "sagemaker_enable_cloudwatch_metrics": "false",
            "sagemaker_container_log_level": '"logging.INFO"',
            "training_steps": "100",
        },
        "RoleArn": "arn:aws:iam::366:role/SageMakerRole",
        "ResourceConfig": {
            "VolumeSizeInGB": 30,
            "InstanceCount": 1,
            "InstanceType": "ml.c4.xlarge",
        },
        "StoppingCondition": {"MaxRuntimeInSeconds": 24 * 60 * 60},
        "TrainingJobName": "neo",
        "TrainingJobStatus": "Completed",
        "TrainingJobArn": "arn:aws:sagemaker:us-west-2:336:training-job/neo",
        "OutputDataConfig": {"KmsKeyId": "", "S3OutputPath": "s3://place/output/neo"},
        "TrainingJobOutput": {"S3TrainingJobOutput": "s3://here/output.tar.gz"},
    }
    sagemaker_session.sagemaker_client.describe_training_job = Mock(
        name="describe_training_job", return_value=returned_job_description
    )

    with pytest.raises(ValueError) as error:
        TensorFlow.attach(training_job_name="neo", sagemaker_session=sagemaker_session)
    assert "didn't use image for requested framework" in str(error)
def test_mnist_async(sagemaker_session):
    estimator = TensorFlow(entry_point=SCRIPT,
                           role=ROLE,
                           train_instance_count=1,
                           train_instance_type='ml.c5.4xlarge',
                           sagemaker_session=sagemaker_session,
                           py_version='py3',
                           framework_version=TensorFlow.LATEST_VERSION,
                           base_job_name=unique_name_from_base('test-tf-sm-mnist'),
                           tags=TAGS)
    inputs = estimator.sagemaker_session.upload_data(
        path=os.path.join(RESOURCE_PATH, 'data'),
        key_prefix='scriptmode/mnist')
    estimator.fit(inputs, wait=False)
    training_job_name = estimator.latest_training_job.name
    time.sleep(20)
    endpoint_name = training_job_name
    _assert_training_job_tags_match(sagemaker_session.sagemaker_client, estimator.latest_training_job.name, TAGS)
    with timeout.timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        estimator = TensorFlow.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session)
        predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.c4.xlarge',
                                     endpoint_name=endpoint_name)

        result = predictor.predict(np.zeros(784))
        print('predict result: {}'.format(result))
        _assert_endpoint_tags_match(sagemaker_session.sagemaker_client, predictor.endpoint, TAGS)
        _assert_model_tags_match(sagemaker_session.sagemaker_client, estimator.latest_training_job.name, TAGS)
Beispiel #4
0
def test_attach_new_repo_name(sagemaker_session, tf_version):
    training_image = '520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-tensorflow:{}-cpu-py2'.format(
        tf_version)
    rjd = {
        'AlgorithmSpecification': {
            'TrainingInputMode': 'File',
            'TrainingImage': training_image
        },
        'HyperParameters': {
            'sagemaker_submit_directory': '"s3://some/sourcedir.tar.gz"',
            'checkpoint_path': '"s3://other/1508872349"',
            'sagemaker_program': '"iris-dnn-classifier.py"',
            'sagemaker_enable_cloudwatch_metrics': 'false',
            'sagemaker_container_log_level': '"logging.INFO"',
            'sagemaker_job_name': '"neo"',
            'training_steps': '100',
            'evaluation_steps': '10'
        },
        'RoleArn': 'arn:aws:iam::366:role/SageMakerRole',
        'ResourceConfig': {
            'VolumeSizeInGB': 30,
            'InstanceCount': 1,
            'InstanceType': 'ml.c4.xlarge'
        },
        'StoppingCondition': {
            'MaxRuntimeInSeconds': 24 * 60 * 60
        },
        'TrainingJobName': 'neo',
        'TrainingJobStatus': 'Completed',
        'OutputDataConfig': {
            'KmsKeyId': '',
            'S3OutputPath': 's3://place/output/neo'
        },
        'TrainingJobOutput': {
            'S3TrainingJobOutput': 's3://here/output.tar.gz'
        }
    }
    sagemaker_session.sagemaker_client.describe_training_job = Mock(
        name='describe_training_job', return_value=rjd)

    estimator = TensorFlow.attach(training_job_name='neo',
                                  sagemaker_session=sagemaker_session)
    assert estimator.latest_training_job.job_name == 'neo'
    assert estimator.py_version == 'py2'
    assert estimator.framework_version == tf_version
    assert estimator.role == 'arn:aws:iam::366:role/SageMakerRole'
    assert estimator.train_instance_count == 1
    assert estimator.train_max_run == 24 * 60 * 60
    assert estimator.input_mode == 'File'
    assert estimator.training_steps == 100
    assert estimator.evaluation_steps == 10
    assert estimator.input_mode == 'File'
    assert estimator.base_job_name == 'neo'
    assert estimator.output_path == 's3://place/output/neo'
    assert estimator.output_kms_key == ''
    assert estimator.hyperparameters()['training_steps'] == '100'
    assert estimator.source_dir == 's3://some/sourcedir.tar.gz'
    assert estimator.entry_point == 'iris-dnn-classifier.py'
    assert estimator.checkpoint_path == 's3://other/1508872349'
    assert estimator.train_image() == training_image
def test_attach(sagemaker_session, tf_version):
    training_image = "1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-tensorflow-py2-cpu:{}-cpu-py2".format(
        tf_version)
    rjd = {
        "AlgorithmSpecification": {
            "TrainingInputMode": "File",
            "TrainingImage": training_image
        },
        "HyperParameters": {
            "sagemaker_submit_directory": '"s3://some/sourcedir.tar.gz"',
            "checkpoint_path": '"s3://other/1508872349"',
            "sagemaker_program": '"iris-dnn-classifier.py"',
            "sagemaker_enable_cloudwatch_metrics": "false",
            "sagemaker_container_log_level": '"logging.INFO"',
            "sagemaker_job_name": '"neo"',
            "training_steps": "100",
            "evaluation_steps": "10",
        },
        "RoleArn": "arn:aws:iam::366:role/SageMakerRole",
        "ResourceConfig": {
            "VolumeSizeInGB": 30,
            "InstanceCount": 1,
            "InstanceType": "ml.c4.xlarge",
        },
        "StoppingCondition": {
            "MaxRuntimeInSeconds": 24 * 60 * 60
        },
        "TrainingJobName": "neo",
        "TrainingJobStatus": "Completed",
        "TrainingJobArn": "arn:aws:sagemaker:us-west-2:336:training-job/neo",
        "OutputDataConfig": {
            "KmsKeyId": "",
            "S3OutputPath": "s3://place/output/neo"
        },
        "TrainingJobOutput": {
            "S3TrainingJobOutput": "s3://here/output.tar.gz"
        },
    }
    sagemaker_session.sagemaker_client.describe_training_job = Mock(
        name="describe_training_job", return_value=rjd)

    estimator = TensorFlow.attach(training_job_name="neo",
                                  sagemaker_session=sagemaker_session)
    assert estimator.latest_training_job.job_name == "neo"
    assert estimator.py_version == "py2"
    assert estimator.framework_version == tf_version
    assert estimator.role == "arn:aws:iam::366:role/SageMakerRole"
    assert estimator.train_instance_count == 1
    assert estimator.train_max_run == 24 * 60 * 60
    assert estimator.input_mode == "File"
    assert estimator.training_steps == 100
    assert estimator.evaluation_steps == 10
    assert estimator.input_mode == "File"
    assert estimator.base_job_name == "neo"
    assert estimator.output_path == "s3://place/output/neo"
    assert estimator.output_kms_key == ""
    assert estimator.hyperparameters()["training_steps"] == "100"
    assert estimator.source_dir == "s3://some/sourcedir.tar.gz"
    assert estimator.entry_point == "iris-dnn-classifier.py"
    assert estimator.checkpoint_path == "s3://other/1508872349"
def test_attach_custom_image(sagemaker_session):
    training_image = '1.dkr.ecr.us-west-2.amazonaws.com/tensorflow_with_custom_binary:1.0'
    rjd = {
        'AlgorithmSpecification': {
            'TrainingInputMode': 'File',
            'TrainingImage': training_image},
        'HyperParameters': {
            'sagemaker_submit_directory': '"s3://some/sourcedir.tar.gz"',
            'checkpoint_path': '"s3://other/1508872349"',
            'sagemaker_program': '"iris-dnn-classifier.py"',
            'sagemaker_enable_cloudwatch_metrics': 'false',
            'sagemaker_container_log_level': '"logging.INFO"',
            'sagemaker_job_name': '"neo"',
            'training_steps': '100',
            'evaluation_steps': '10'},
        'RoleArn': 'arn:aws:iam::366:role/SageMakerRole',
        'ResourceConfig': {
            'VolumeSizeInGB': 30,
            'InstanceCount': 1,
            'InstanceType': 'ml.c4.xlarge'},
        'StoppingCondition': {'MaxRuntimeInSeconds': 24 * 60 * 60},
        'TrainingJobName': 'neo',
        'TrainingJobStatus': 'Completed',
        'OutputDataConfig': {'KmsKeyId': '', 'S3OutputPath': 's3://place/output/neo'},
        'TrainingJobOutput': {'S3TrainingJobOutput': 's3://here/output.tar.gz'}}
    sagemaker_session.sagemaker_client.describe_training_job = Mock(name='describe_training_job', return_value=rjd)

    estimator = TensorFlow.attach(training_job_name='neo', sagemaker_session=sagemaker_session)
    assert estimator.image_name == training_image
    assert estimator.train_image() == training_image
Beispiel #7
0
def test_tf_async(sagemaker_session):
    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        script_path = os.path.join(DATA_DIR, 'iris', 'iris-dnn-classifier.py')

        estimator = TensorFlow(entry_point=script_path,
                               role='SageMakerRole',
                               training_steps=1,
                               evaluation_steps=1,
                               hyperparameters={'input_tensor_name': 'inputs'},
                               train_instance_count=1,
                               train_instance_type='ml.c4.xlarge',
                               sagemaker_session=sagemaker_session,
                               base_job_name='test-tf')

        inputs = estimator.sagemaker_session.upload_data(
            path=DATA_PATH, key_prefix='integ-test-data/tf_iris')
        estimator.fit(inputs, wait=False)
        training_job_name = estimator.latest_training_job.name
        time.sleep(20)

    endpoint_name = training_job_name
    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        estimator = TensorFlow.attach(training_job_name=training_job_name,
                                      sagemaker_session=sagemaker_session)
        json_predictor = estimator.deploy(initial_instance_count=1,
                                          instance_type='ml.c4.xlarge',
                                          endpoint_name=endpoint_name)

        result = json_predictor.predict([6.4, 3.2, 4.5, 1.5])
        print('predict result: {}'.format(result))
def test_tf_async(sagemaker_session):
    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        script_path = os.path.join(DATA_DIR, 'iris', 'iris-dnn-classifier.py')

        estimator = TensorFlow(entry_point=script_path,
                               role='SageMakerRole',
                               training_steps=1,
                               evaluation_steps=1,
                               hyperparameters={'input_tensor_name': 'inputs'},
                               train_instance_count=1,
                               train_instance_type='ml.c4.xlarge',
                               sagemaker_session=sagemaker_session,
                               base_job_name='test-tf')

        inputs = estimator.sagemaker_session.upload_data(path=DATA_PATH, key_prefix='integ-test-data/tf_iris')
        estimator.fit(inputs, wait=False)
        training_job_name = estimator.latest_training_job.name
        time.sleep(20)

    endpoint_name = training_job_name
    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        estimator = TensorFlow.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session)
        json_predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.c4.xlarge',
                                          endpoint_name=endpoint_name)

        result = json_predictor.predict([6.4, 3.2, 4.5, 1.5])
        print('predict result: {}'.format(result))
Beispiel #9
0
def test_attach_custom_image(sagemaker_session):
    training_image = "1.dkr.ecr.us-west-2.amazonaws.com/tensorflow_with_custom_binary:1.0"
    rjd = {
        "AlgorithmSpecification": {"TrainingInputMode": "File", "TrainingImage": training_image},
        "HyperParameters": {
            "sagemaker_submit_directory": '"s3://some/sourcedir.tar.gz"',
            "checkpoint_path": '"s3://other/1508872349"',
            "sagemaker_program": '"iris-dnn-classifier.py"',
            "sagemaker_enable_cloudwatch_metrics": "false",
            "sagemaker_container_log_level": '"logging.INFO"',
            "sagemaker_job_name": '"neo"',
            "training_steps": "100",
            "evaluation_steps": "10",
        },
        "RoleArn": "arn:aws:iam::366:role/SageMakerRole",
        "ResourceConfig": {
            "VolumeSizeInGB": 30,
            "InstanceCount": 1,
            "InstanceType": "ml.c4.xlarge",
        },
        "StoppingCondition": {"MaxRuntimeInSeconds": 24 * 60 * 60},
        "TrainingJobName": "neo",
        "TrainingJobStatus": "Completed",
        "TrainingJobArn": "arn:aws:sagemaker:us-west-2:336:training-job/neo",
        "OutputDataConfig": {"KmsKeyId": "", "S3OutputPath": "s3://place/output/neo"},
        "TrainingJobOutput": {"S3TrainingJobOutput": "s3://here/output.tar.gz"},
    }
    sagemaker_session.sagemaker_client.describe_training_job = Mock(
        name="describe_training_job", return_value=rjd
    )

    estimator = TensorFlow.attach(training_job_name="neo", sagemaker_session=sagemaker_session)
    assert estimator.image_name == training_image
    assert estimator.train_image() == training_image
def test_attach_custom_image(sagemaker_session):
    training_image = '1.dkr.ecr.us-west-2.amazonaws.com/tensorflow_with_custom_binary:1.0'
    rjd = {
        'AlgorithmSpecification': {
            'TrainingInputMode': 'File',
            'TrainingImage': training_image},
        'HyperParameters': {
            'sagemaker_submit_directory': '"s3://some/sourcedir.tar.gz"',
            'checkpoint_path': '"s3://other/1508872349"',
            'sagemaker_program': '"iris-dnn-classifier.py"',
            'sagemaker_enable_cloudwatch_metrics': 'false',
            'sagemaker_container_log_level': '"logging.INFO"',
            'sagemaker_job_name': '"neo"',
            'training_steps': '100',
            'evaluation_steps': '10'},
        'RoleArn': 'arn:aws:iam::366:role/SageMakerRole',
        'ResourceConfig': {
            'VolumeSizeInGB': 30,
            'InstanceCount': 1,
            'InstanceType': 'ml.c4.xlarge'},
        'StoppingCondition': {'MaxRuntimeInSeconds': 24 * 60 * 60},
        'TrainingJobName': 'neo',
        'TrainingJobStatus': 'Completed',
        'OutputDataConfig': {'KmsKeyId': '', 'S3OutputPath': 's3://place/output/neo'},
        'TrainingJobOutput': {'S3TrainingJobOutput': 's3://here/output.tar.gz'}}
    sagemaker_session.sagemaker_client.describe_training_job = Mock(name='describe_training_job', return_value=rjd)

    estimator = TensorFlow.attach(training_job_name='neo', sagemaker_session=sagemaker_session)
    assert estimator.image_name == training_image
    assert estimator.train_image() == training_image
Beispiel #11
0
def test_attach(sagemaker_session, tensorflow_training_version, tensorflow_training_py_version):
    if Version(tensorflow_training_version) > Version("1.12"):
        pytest.skip("framework_name_from_image doesn't infer info from DLC image URIs.")

    training_image = image_uris.retrieve(
        "tensorflow",
        region=REGION,
        version=tensorflow_training_version,
        py_version=tensorflow_training_py_version,
        instance_type="ml.c4.xlarge",
        image_scope="training",
    )
    rjd = {
        "AlgorithmSpecification": {"TrainingInputMode": "File", "TrainingImage": training_image},
        "HyperParameters": {
            "sagemaker_submit_directory": '"s3://some/sourcedir.tar.gz"',
            "sagemaker_program": '"iris-dnn-classifier.py"',
            "sagemaker_container_log_level": '"logging.INFO"',
            "sagemaker_job_name": '"neo"',
        },
        "RoleArn": "arn:aws:iam::366:role/SageMakerRole",
        "ResourceConfig": {
            "VolumeSizeInGB": 30,
            "InstanceCount": 1,
            "InstanceType": "ml.c4.xlarge",
        },
        "StoppingCondition": {"MaxRuntimeInSeconds": 24 * 60 * 60},
        "TrainingJobName": "neo",
        "TrainingJobStatus": "Completed",
        "TrainingJobArn": "arn:aws:sagemaker:us-west-2:336:training-job/neo",
        "OutputDataConfig": {"KmsKeyId": "", "S3OutputPath": "s3://place/output/neo"},
        "TrainingJobOutput": {"S3TrainingJobOutput": "s3://here/output.tar.gz"},
    }
    sagemaker_session.sagemaker_client.describe_training_job = Mock(
        name="describe_training_job", return_value=rjd
    )

    estimator = TensorFlow.attach(training_job_name="neo", sagemaker_session=sagemaker_session)
    assert estimator.latest_training_job.job_name == "neo"
    assert estimator.py_version == tensorflow_training_py_version
    assert estimator.framework_version == tensorflow_training_version
    assert estimator.role == "arn:aws:iam::366:role/SageMakerRole"
    assert estimator.instance_count == 1
    assert estimator.max_run == 24 * 60 * 60
    assert estimator.input_mode == "File"
    assert estimator.input_mode == "File"
    assert estimator.base_job_name == "neo"
    assert estimator.output_path == "s3://place/output/neo"
    assert estimator.output_kms_key == ""
    assert estimator.hyperparameters() is not None
    assert estimator.source_dir == "s3://some/sourcedir.tar.gz"
    assert estimator.entry_point == "iris-dnn-classifier.py"
    assert estimator.training_image_uri() == training_image
def test_attach_wrong_framework(sagemaker_session):
    returned_job_description = {
        'AlgorithmSpecification': {
            'TrainingInputMode': 'File',
            'TrainingImage': '1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-py2-cpu:1.0'
        },
        'HyperParameters': {
            'sagemaker_submit_directory': '"s3://some/sourcedir.tar.gz"',
            'sagemaker_program': '"iris-dnn-classifier.py"',
            'sagemaker_enable_cloudwatch_metrics': 'false',
            'sagemaker_container_log_level': '"logging.INFO"',
            'training_steps': '100'

        },
        'RoleArn': 'arn:aws:iam::366:role/SageMakerRole',
        'ResourceConfig':
            {'VolumeSizeInGB': 30,
             'InstanceCount': 1,
             'InstanceType': 'ml.c4.xlarge'
             },
        'StoppingCondition': {
            'MaxRuntimeInSeconds': 24 * 60 * 60
        },
        'TrainingJobName': 'neo',
        'TrainingJobStatus': 'Completed',
        'OutputDataConfig': {
            'KmsKeyId': '',
            'S3OutputPath': 's3://place/output/neo'
        },
        'TrainingJobOutput': {
            'S3TrainingJobOutput': 's3://here/output.tar.gz'
        }
    }
    sagemaker_session.sagemaker_client.describe_training_job = Mock(name='describe_training_job',
                                                                    return_value=returned_job_description)

    with pytest.raises(ValueError) as error:
        TensorFlow.attach(training_job_name='neo', sagemaker_session=sagemaker_session)
    assert "didn't use image for requested framework" in str(error)
def test_attach(sagemaker_session, tf_version):
    training_image = '1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-tensorflow-py2-cpu:{}-cpu-py2'.format(tf_version)
    rjd = {
        'AlgorithmSpecification': {
            'TrainingInputMode': 'File',
            'TrainingImage': training_image
        },
        'HyperParameters': {
            'sagemaker_submit_directory': '"s3://some/sourcedir.tar.gz"',
            'checkpoint_path': '"s3://other/1508872349"',
            'sagemaker_program': '"iris-dnn-classifier.py"',
            'sagemaker_enable_cloudwatch_metrics': 'false',
            'sagemaker_container_log_level': '"logging.INFO"',
            'sagemaker_job_name': '"neo"',
            'training_steps': '100',
            'evaluation_steps': '10'
        },
        'RoleArn': 'arn:aws:iam::366:role/SageMakerRole',
        'ResourceConfig': {
            'VolumeSizeInGB': 30,
            'InstanceCount': 1,
            'InstanceType': 'ml.c4.xlarge'
        },
        'StoppingCondition': {'MaxRuntimeInSeconds': 24 * 60 * 60},
        'TrainingJobName': 'neo',
        'TrainingJobStatus': 'Completed',
        'OutputDataConfig': {'KmsKeyId': '', 'S3OutputPath': 's3://place/output/neo'},
        'TrainingJobOutput': {'S3TrainingJobOutput': 's3://here/output.tar.gz'}}
    sagemaker_session.sagemaker_client.describe_training_job = Mock(name='describe_training_job', return_value=rjd)

    estimator = TensorFlow.attach(training_job_name='neo', sagemaker_session=sagemaker_session)
    assert estimator.latest_training_job.job_name == 'neo'
    assert estimator.py_version == 'py2'
    assert estimator.framework_version == tf_version
    assert estimator.role == 'arn:aws:iam::366:role/SageMakerRole'
    assert estimator.train_instance_count == 1
    assert estimator.train_max_run == 24 * 60 * 60
    assert estimator.input_mode == 'File'
    assert estimator.training_steps == 100
    assert estimator.evaluation_steps == 10
    assert estimator.input_mode == 'File'
    assert estimator.base_job_name == 'neo'
    assert estimator.output_path == 's3://place/output/neo'
    assert estimator.output_kms_key == ''
    assert estimator.hyperparameters()['training_steps'] == '100'
    assert estimator.source_dir == 's3://some/sourcedir.tar.gz'
    assert estimator.entry_point == 'iris-dnn-classifier.py'
    assert estimator.checkpoint_path == 's3://other/1508872349'
Beispiel #14
0
def test_mnist_async(sagemaker_session, cpu_instance_type, tf_full_version,
                     tf_full_py_version):
    if tf_full_version == "2.7.0":
        tf_full_version = "2.7"

    estimator = TensorFlow(
        entry_point=SCRIPT,
        source_dir=MNIST_RESOURCE_PATH,
        role=ROLE,
        instance_count=1,
        instance_type="ml.c5.4xlarge",
        sagemaker_session=sagemaker_session,
        framework_version=tf_full_version,
        py_version=tf_full_py_version,
        tags=TAGS,
    )
    inputs = estimator.sagemaker_session.upload_data(
        path=os.path.join(MNIST_RESOURCE_PATH, "data"),
        key_prefix="scriptmode/mnist")
    estimator.fit(inputs=inputs,
                  wait=False,
                  job_name=unique_name_from_base("test-tf-sm-async"))
    training_job_name = estimator.latest_training_job.name
    time.sleep(20)
    endpoint_name = training_job_name
    _assert_training_job_tags_match(sagemaker_session.sagemaker_client,
                                    estimator.latest_training_job.name, TAGS)
    with tests.integ.timeout.timeout_and_delete_endpoint_by_name(
            endpoint_name, sagemaker_session):
        estimator = TensorFlow.attach(training_job_name=training_job_name,
                                      sagemaker_session=sagemaker_session)
        model_name = "model-mnist-async"
        predictor = estimator.deploy(
            initial_instance_count=1,
            instance_type=cpu_instance_type,
            endpoint_name=endpoint_name,
            model_name=model_name,
        )

        result = predictor.predict(np.zeros(784))
        print("predict result: {}".format(result))
        _assert_endpoint_tags_match(sagemaker_session.sagemaker_client,
                                    predictor.endpoint_name, TAGS)
        _assert_model_tags_match(sagemaker_session.sagemaker_client,
                                 model_name, TAGS)
        _assert_model_name_match(sagemaker_session.sagemaker_client,
                                 endpoint_name, model_name)
def test_mnist_async(sagemaker_session, cpu_instance_type):
    estimator = TensorFlow(
        entry_point=SCRIPT,
        role=ROLE,
        train_instance_count=1,
        train_instance_type="ml.c5.4xlarge",
        py_version=tests.integ.PYTHON_VERSION,
        sagemaker_session=sagemaker_session,
        script_mode=True,
        # testing py-sdk functionality, no need to run against all TF versions
        framework_version=TensorFlow.LATEST_VERSION,
        tags=TAGS,
    )
    inputs = estimator.sagemaker_session.upload_data(
        path=os.path.join(MNIST_RESOURCE_PATH, "data"),
        key_prefix="scriptmode/mnist")
    estimator.fit(inputs=inputs,
                  wait=False,
                  job_name=unique_name_from_base("test-tf-sm-async"))
    training_job_name = estimator.latest_training_job.name
    time.sleep(20)
    endpoint_name = training_job_name
    _assert_training_job_tags_match(sagemaker_session.sagemaker_client,
                                    estimator.latest_training_job.name, TAGS)
    with tests.integ.timeout.timeout_and_delete_endpoint_by_name(
            endpoint_name, sagemaker_session):
        estimator = TensorFlow.attach(training_job_name=training_job_name,
                                      sagemaker_session=sagemaker_session)
        model_name = "model-mnist-async"
        predictor = estimator.deploy(
            initial_instance_count=1,
            instance_type=cpu_instance_type,
            endpoint_name=endpoint_name,
            model_name=model_name,
        )

        result = predictor.predict(np.zeros(784))
        print("predict result: {}".format(result))
        _assert_endpoint_tags_match(sagemaker_session.sagemaker_client,
                                    predictor.endpoint, TAGS)
        _assert_model_tags_match(sagemaker_session.sagemaker_client,
                                 model_name, TAGS)
        _assert_model_name_match(sagemaker_session.sagemaker_client,
                                 endpoint_name, model_name)