Ejemplo n.º 1
0
def test_attach_wrong_framework(sagemaker_session):
    rjd = {
        'AlgorithmSpecification': {
            'TrainingInputMode': 'File',
            'TrainingImage': '1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-tensorflow-py2-cpu:1.0.4'},
        'HyperParameters': {
            'sagemaker_submit_directory': '"s3://some/sourcedir.tar.gz"',
            'checkpoint_path': '"s3://other/1508872349"',
            'sagemaker_program': '"iris-dnn-classifier.py"',
            'sagemaker_enable_cloudwatch_metrics': 'false',
            'sagemaker_container_log_level': '"logging.INFO"',
            'training_steps': '100',
            'sagemaker_region': '"us-west-2"'},
        'RoleArn': 'arn:aws:iam::366:role/SageMakerRole',
        'ResourceConfig': {
            'VolumeSizeInGB': 30,
            'InstanceCount': 1,
            'InstanceType': 'ml.c4.xlarge'},
        'StoppingCondition': {'MaxRuntimeInSeconds': 24 * 60 * 60},
        'TrainingJobName': 'neo',
        'TrainingJobStatus': 'Completed',
        'OutputDataConfig': {'KmsKeyId': '', 'S3OutputPath': 's3://place/output/neo'},
        'TrainingJobOutput': {'S3TrainingJobOutput': 's3://here/output.tar.gz'}}
    sagemaker_session.sagemaker_client.describe_training_job = Mock(name='describe_training_job', return_value=rjd)

    with pytest.raises(ValueError) as error:
        MXNet.attach(training_job_name='neo', sagemaker_session=sagemaker_session)
    assert "didn't use image for requested framework" in str(error)
def test_custom_image_estimator_deploy(sagemaker_session):
    custom_image = "mycustomimage:latest"
    mx = MXNet(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        train_instance_count=INSTANCE_COUNT,
        train_instance_type=INSTANCE_TYPE,
    )
    mx.fit(inputs="s3://mybucket/train", job_name="new_name")
    model = mx.create_model(image=custom_image)
    assert model.image == custom_image
def test_estimator_script_mode_launch_parameter_server(warning, sagemaker_session):
    mx = MXNet(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        train_instance_count=INSTANCE_COUNT,
        train_instance_type=INSTANCE_TYPE,
        distributions=LAUNCH_PS_DISTRIBUTIONS_DICT,
        framework_version="1.3.0",
    )
    assert mx.hyperparameters().get(MXNet.LAUNCH_PS_ENV_NAME) == "true"
    warning.assert_called_with("distributions", "distribution")
Ejemplo n.º 4
0
def test_transform_mxnet_vpc(sagemaker_session, mxnet_full_version,
                             cpu_instance_type):
    data_path = os.path.join(DATA_DIR, "mxnet_mnist")
    script_path = os.path.join(data_path, "mnist.py")

    ec2_client = sagemaker_session.boto_session.client("ec2")
    subnet_ids, security_group_id = get_or_create_vpc_resources(ec2_client)

    mx = MXNet(
        entry_point=script_path,
        role="SageMakerRole",
        train_instance_count=1,
        train_instance_type=cpu_instance_type,
        sagemaker_session=sagemaker_session,
        framework_version=mxnet_full_version,
        subnets=subnet_ids,
        security_group_ids=[security_group_id],
    )

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "train"),
        key_prefix="integ-test-data/mxnet_mnist/train")
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "test"),
        key_prefix="integ-test-data/mxnet_mnist/test")
    job_name = unique_name_from_base("test-mxnet-vpc")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({"train": train_input, "test": test_input}, job_name=job_name)

    job_desc = sagemaker_session.sagemaker_client.describe_training_job(
        TrainingJobName=mx.latest_training_job.name)
    assert set(subnet_ids) == set(job_desc["VpcConfig"]["Subnets"])
    assert [security_group_id] == job_desc["VpcConfig"]["SecurityGroupIds"]

    transform_input_path = os.path.join(data_path, "transform", "data.csv")
    transform_input_key_prefix = "integ-test-data/mxnet_mnist/transform"
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix)

    transformer = _create_transformer_and_transform_job(
        mx, transform_input, cpu_instance_type)
    with timeout_and_delete_model_with_transformer(
            transformer,
            sagemaker_session,
            minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
        transformer.wait()
        model_desc = sagemaker_session.sagemaker_client.describe_model(
            ModelName=transformer.model_name)
        assert set(subnet_ids) == set(model_desc["VpcConfig"]["Subnets"])
        assert [security_group_id
                ] == model_desc["VpcConfig"]["SecurityGroupIds"]
Ejemplo n.º 5
0
def test_local_transform_mxnet(
    sagemaker_local_session,
    tmpdir,
    mxnet_inference_latest_version,
    mxnet_inference_latest_py_version,
    cpu_instance_type,
):
    data_path = os.path.join(DATA_DIR, "mxnet_mnist")
    script_path = os.path.join(data_path, "mnist.py")

    mx = MXNet(
        entry_point=script_path,
        role="SageMakerRole",
        instance_count=1,
        instance_type="local",
        framework_version=mxnet_inference_latest_version,
        py_version=mxnet_inference_latest_py_version,
        sagemaker_session=sagemaker_local_session,
    )

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "train"),
        key_prefix="integ-test-data/mxnet_mnist/train")
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "test"),
        key_prefix="integ-test-data/mxnet_mnist/test")

    with stopit.ThreadingTimeout(5 * 60, swallow_exc=False):
        mx.fit({"train": train_input, "test": test_input})

    transform_input_path = os.path.join(data_path, "transform")
    transform_input_key_prefix = "integ-test-data/mxnet_mnist/transform"
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix)

    output_path = "file://%s" % (str(tmpdir))
    transformer = mx.transformer(
        1,
        "local",
        assemble_with="Line",
        max_payload=1,
        strategy="SingleRecord",
        output_path=output_path,
    )

    with lock.lock(LOCK_PATH):
        transformer.transform(transform_input,
                              content_type="text/csv",
                              split_type="Line")
        transformer.wait()

    assert os.path.exists(os.path.join(str(tmpdir), "data.csv.out"))
Ejemplo n.º 6
0
def test_estimator_script_mode_dont_launch_parameter_server(sagemaker_session):
    mx = MXNet(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        train_instance_count=INSTANCE_COUNT,
        train_instance_type=INSTANCE_TYPE,
        distributions={"parameter_server": {
            "enabled": False
        }},
        framework_version="1.3.0",
    )
    assert mx.hyperparameters().get(MXNet.LAUNCH_PS_ENV_NAME) == "false"
def test_transform_mxnet(sagemaker_session, mxnet_full_version):
    data_path = os.path.join(DATA_DIR, "mxnet_mnist")
    script_path = os.path.join(data_path, "mnist.py")

    mx = MXNet(
        entry_point=script_path,
        role="SageMakerRole",
        train_instance_count=1,
        train_instance_type="ml.c4.xlarge",
        sagemaker_session=sagemaker_session,
        framework_version=mxnet_full_version,
    )

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "train"),
        key_prefix="integ-test-data/mxnet_mnist/train")
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "test"),
        key_prefix="integ-test-data/mxnet_mnist/test")
    job_name = unique_name_from_base("test-mxnet-transform")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({"train": train_input, "test": test_input}, job_name=job_name)

    transform_input_path = os.path.join(data_path, "transform", "data.csv")
    transform_input_key_prefix = "integ-test-data/mxnet_mnist/transform"
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix)

    kms_key_arn = get_or_create_kms_key(sagemaker_session)
    output_filter = "$"

    transformer = _create_transformer_and_transform_job(
        mx,
        transform_input,
        kms_key_arn,
        input_filter=None,
        output_filter=output_filter,
        join_source=None,
    )
    with timeout_and_delete_model_with_transformer(
            transformer,
            sagemaker_session,
            minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
        transformer.wait()

    job_desc = transformer.sagemaker_session.sagemaker_client.describe_transform_job(
        TransformJobName=transformer.latest_transform_job.name)
    assert kms_key_arn == job_desc["TransformResources"]["VolumeKmsKeyId"]
    assert output_filter == job_desc["DataProcessing"]["OutputFilter"]
Ejemplo n.º 8
0
def test_mxnet_neo(strftime, sagemaker_session, neo_mxnet_version):
    mx = MXNet(
        entry_point=SCRIPT_PATH,
        framework_version="1.6",
        py_version="py3",
        role=ROLE,
        sagemaker_session=sagemaker_session,
        instance_count=INSTANCE_COUNT,
        instance_type=INSTANCE_TYPE,
        base_job_name="sagemaker-mxnet",
    )
    mx.fit()

    input_shape = {"data": [100, 1, 28, 28]}
    output_location = "s3://neo-sdk-test"

    compiled_model = mx.compile_model(
        target_instance_family="ml_c4",
        input_shape=input_shape,
        output_path=output_location,
        framework="mxnet",
        framework_version=neo_mxnet_version,
    )

    sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
    assert sagemaker_call_names == [
        "train",
        "logs_for_job",
        "sagemaker_client.describe_training_job",
        "compile_model",
        "wait_for_compilation_job",
    ]

    expected_compile_model_args = _create_compilation_job(
        json.dumps(input_shape), output_location)
    actual_compile_model_args = sagemaker_session.method_calls[3][2]
    assert expected_compile_model_args == actual_compile_model_args

    assert compiled_model.image_uri == _neo_inference_image(neo_mxnet_version)

    predictor = mx.deploy(1, CPU, use_compiled_model=True)
    assert isinstance(predictor, MXNetPredictor)

    with pytest.raises(Exception) as wrong_target:
        mx.deploy(1, CPU_C5, use_compiled_model=True)
    assert str(wrong_target.value).startswith("No compiled model for")

    # deploy without sagemaker Neo should continue to work
    mx.deploy(1, CPU)
Ejemplo n.º 9
0
def test_transform_mxnet_vpc(sagemaker_session, mxnet_full_version):
    data_path = os.path.join(DATA_DIR, 'mxnet_mnist')
    script_path = os.path.join(data_path, 'mnist.py')

    ec2_client = sagemaker_session.boto_session.client('ec2')
    subnet_ids, security_group_id = get_or_create_vpc_resources(
        ec2_client, sagemaker_session.boto_session.region_name)

    mx = MXNet(entry_point=script_path,
               role='SageMakerRole',
               train_instance_count=1,
               train_instance_type='ml.c4.xlarge',
               sagemaker_session=sagemaker_session,
               framework_version=mxnet_full_version,
               subnets=subnet_ids,
               security_group_ids=[security_group_id])

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'train'),
        key_prefix='integ-test-data/mxnet_mnist/train')
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'test'),
        key_prefix='integ-test-data/mxnet_mnist/test')
    job_name = unique_name_from_base('test-mxnet-vpc')

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({'train': train_input, 'test': test_input}, job_name=job_name)

    job_desc = sagemaker_session.sagemaker_client.describe_training_job(
        TrainingJobName=mx.latest_training_job.name)
    assert set(subnet_ids) == set(job_desc['VpcConfig']['Subnets'])
    assert [security_group_id] == job_desc['VpcConfig']['SecurityGroupIds']

    transform_input_path = os.path.join(data_path, 'transform', 'data.csv')
    transform_input_key_prefix = 'integ-test-data/mxnet_mnist/transform'
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix)

    transformer = _create_transformer_and_transform_job(mx, transform_input)
    with timeout_and_delete_model_with_transformer(
            transformer,
            sagemaker_session,
            minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
        transformer.wait()
        model_desc = sagemaker_session.sagemaker_client.describe_model(
            ModelName=transformer.model_name)
        assert set(subnet_ids) == set(model_desc['VpcConfig']['Subnets'])
        assert [security_group_id
                ] == model_desc['VpcConfig']['SecurityGroupIds']
Ejemplo n.º 10
0
def test_single_transformer_multiple_jobs(sagemaker_session,
                                          mxnet_full_version,
                                          cpu_instance_type):
    data_path = os.path.join(DATA_DIR, "mxnet_mnist")
    script_path = os.path.join(data_path, "mnist.py")

    mx = MXNet(
        entry_point=script_path,
        role="SageMakerRole",
        train_instance_count=1,
        train_instance_type=cpu_instance_type,
        sagemaker_session=sagemaker_session,
        framework_version=mxnet_full_version,
    )

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "train"),
        key_prefix="integ-test-data/mxnet_mnist/train")
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "test"),
        key_prefix="integ-test-data/mxnet_mnist/test")
    job_name = unique_name_from_base("test-mxnet-transform")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({"train": train_input, "test": test_input}, job_name=job_name)

    transform_input_path = os.path.join(data_path, "transform", "data.csv")
    transform_input_key_prefix = "integ-test-data/mxnet_mnist/transform"
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix)

    transformer = mx.transformer(1, cpu_instance_type)

    job_name = unique_name_from_base("test-mxnet-transform")
    transformer.transform(transform_input,
                          content_type="text/csv",
                          job_name=job_name)
    with timeout_and_delete_model_with_transformer(
            transformer,
            sagemaker_session,
            minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
        assert transformer.output_path == "s3://{}/{}".format(
            sagemaker_session.default_bucket(), job_name)
        job_name = unique_name_from_base("test-mxnet-transform")
        transformer.transform(transform_input,
                              content_type="text/csv",
                              job_name=job_name)
        assert transformer.output_path == "s3://{}/{}".format(
            sagemaker_session.default_bucket(), job_name)
Ejemplo n.º 11
0
def test_keras_training(docker_image, sagemaker_local_session, local_instance_type,
                        framework_version, tmpdir):
    keras_path = os.path.join(RESOURCE_PATH, 'keras')
    script_path = os.path.join(keras_path, 'keras_mnist.py')

    mx = MXNet(entry_point=script_path, role='SageMakerRole', train_instance_count=1,
               train_instance_type=local_instance_type, sagemaker_session=sagemaker_local_session,
               image_name=docker_image, framework_version=framework_version,
               output_path='file://{}'.format(tmpdir))

    train = 'file://{}'.format(os.path.join(keras_path, 'data'))
    mx.fit({'train': train})

    for directory, files in MODEL_SUCCESS_FILES.items():
        local_mode_utils.assert_output_files_exist(str(tmpdir), directory, files)
Ejemplo n.º 12
0
def test_onnx_export(docker_image, sagemaker_local_session,
                     local_instance_type, framework_version, tmpdir):
    mx = MXNet(entry_point=SCRIPT_PATH,
               role='SageMakerRole',
               instance_count=1,
               instance_type=local_instance_type,
               sagemaker_session=sagemaker_local_session,
               image_uri=docker_image,
               framework_version=framework_version,
               output_path='file://{}'.format(tmpdir))

    mx.fit()

    local_mode_utils.assert_output_files_exist(str(tmpdir), 'output',
                                               ['success'])
def test_local_transform_mxnet(sagemaker_local_session, tmpdir,
                               mxnet_full_version):
    local_mode_lock_fd = open(LOCK_PATH, 'w')
    local_mode_lock = local_mode_lock_fd.fileno()
    data_path = os.path.join(DATA_DIR, 'mxnet_mnist')
    script_path = os.path.join(data_path, 'mnist.py')

    mx = MXNet(entry_point=script_path,
               role='SageMakerRole',
               train_instance_count=1,
               train_instance_type='ml.c4.xlarge',
               framework_version=mxnet_full_version,
               sagemaker_session=sagemaker_local_session)

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'train'),
        key_prefix='integ-test-data/mxnet_mnist/train')
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'test'),
        key_prefix='integ-test-data/mxnet_mnist/test')

    with timeout(minutes=15):
        mx.fit({'train': train_input, 'test': test_input})

    transform_input_path = os.path.join(data_path, 'transform')
    transform_input_key_prefix = 'integ-test-data/mxnet_mnist/transform'
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix)

    output_path = 'file://%s' % (str(tmpdir))
    transformer = mx.transformer(1,
                                 'local',
                                 assemble_with='Line',
                                 max_payload=1,
                                 strategy='SingleRecord',
                                 output_path=output_path)

    # Since Local Mode uses the same port for serving, we need a lock in order
    # to allow concurrent test execution.
    fcntl.lockf(local_mode_lock, fcntl.LOCK_EX)
    transformer.transform(transform_input,
                          content_type='text/csv',
                          split_type='Line')
    transformer.wait()
    time.sleep(5)
    fcntl.lockf(local_mode_lock, fcntl.LOCK_UN)

    assert os.path.exists(os.path.join(str(tmpdir), 'data.csv.out'))
def test_attach_custom_image(sagemaker_session):
    training_image = 'ubuntu:latest'
    returned_job_description = {'AlgorithmSpecification': {
        'TrainingInputMode': 'File',
        'TrainingImage': training_image},
        'HyperParameters': {
            'sagemaker_submit_directory': '"s3://some/sourcedir.tar.gz"',
            'sagemaker_program': '"iris-dnn-classifier.py"',
            'sagemaker_s3_uri_training': '"sagemaker-3/integ-test-data/tf_iris"',
            'sagemaker_enable_cloudwatch_metrics': 'false',
            'sagemaker_container_log_level': '"logging.INFO"',
            'sagemaker_job_name': '"neo"',
            'training_steps': '100',
            'sagemaker_region': '"us-west-2"'},
        'RoleArn': 'arn:aws:iam::366:role/SageMakerRole',
        'ResourceConfig': {
            'VolumeSizeInGB': 30,
            'InstanceCount': 1,
            'InstanceType': 'ml.c4.xlarge'},
        'StoppingCondition': {'MaxRuntimeInSeconds': 24 * 60 * 60},
        'TrainingJobName': 'neo',
        'TrainingJobStatus': 'Completed',
        'TrainingJobArn': 'arn:aws:sagemaker:us-west-2:336:training-job/neo',
        'OutputDataConfig': {'KmsKeyId': '', 'S3OutputPath': 's3://place/output/neo'},
        'TrainingJobOutput': {'S3TrainingJobOutput': 's3://here/output.tar.gz'}}
    sagemaker_session.sagemaker_client.describe_training_job = Mock(name='describe_training_job',
                                                                    return_value=returned_job_description)

    estimator = MXNet.attach(training_job_name='neo', sagemaker_session=sagemaker_session)
    assert estimator.image_name == training_image
    assert estimator.train_image() == training_image
Ejemplo n.º 15
0
def test_s3_input_mode(sagemaker_session, tuner):
    expected_input_mode = 'Pipe'

    script_path = os.path.join(DATA_DIR, 'mxnet_mnist', 'failure_script.py')
    mxnet = MXNet(entry_point=script_path,
                  role=ROLE,
                  framework_version=FRAMEWORK_VERSION,
                  train_instance_count=TRAIN_INSTANCE_COUNT,
                  train_instance_type=TRAIN_INSTANCE_TYPE,
                  sagemaker_session=sagemaker_session)
    tuner.estimator = mxnet

    tags = [{'Name': 'some-tag-without-a-value'}]
    tuner.tags = tags

    hyperparameter_ranges = {
        'num_components': IntegerParameter(2, 4),
        'algorithm_mode': CategoricalParameter(['regular', 'randomized'])
    }
    tuner._hyperparameter_ranges = hyperparameter_ranges

    tuner.fit(inputs=s3_input('s3://mybucket/train_manifest',
                              input_mode=expected_input_mode))

    actual_input_mode = sagemaker_session.method_calls[1][2]['input_mode']
    assert actual_input_mode == expected_input_mode
Ejemplo n.º 16
0
def test_attach_custom_image(sagemaker_session):
    training_image = 'ubuntu:latest'
    returned_job_description = {'AlgorithmSpecification': {
        'TrainingInputMode': 'File',
        'TrainingImage': training_image},
        'HyperParameters': {
            'sagemaker_submit_directory': '"s3://some/sourcedir.tar.gz"',
            'sagemaker_program': '"iris-dnn-classifier.py"',
            'sagemaker_s3_uri_training': '"sagemaker-3/integ-test-data/tf_iris"',
            'sagemaker_enable_cloudwatch_metrics': 'false',
            'sagemaker_container_log_level': '"logging.INFO"',
            'sagemaker_job_name': '"neo"',
            'training_steps': '100',
            'sagemaker_region': '"us-west-2"'},
        'RoleArn': 'arn:aws:iam::366:role/SageMakerRole',
        'ResourceConfig': {
            'VolumeSizeInGB': 30,
            'InstanceCount': 1,
            'InstanceType': 'ml.c4.xlarge'},
        'StoppingCondition': {'MaxRuntimeInSeconds': 24 * 60 * 60},
        'TrainingJobName': 'neo',
        'TrainingJobStatus': 'Completed',
        'OutputDataConfig': {'KmsKeyId': '', 'S3OutputPath': 's3://place/output/neo'},
        'TrainingJobOutput': {'S3TrainingJobOutput': 's3://here/output.tar.gz'}}
    sagemaker_session.sagemaker_client.describe_training_job = Mock(name='describe_training_job',
                                                                    return_value=returned_job_description)

    estimator = MXNet.attach(training_job_name='neo', sagemaker_session=sagemaker_session)
    assert estimator.image_name == training_image
    assert estimator.train_image() == training_image
Ejemplo n.º 17
0
def test_onnx_export(docker_image, sagemaker_local_session,
                     local_instance_type, framework_version, tmpdir):
    mx = MXNet(entry_point=SCRIPT_PATH,
               role='SageMakerRole',
               train_instance_count=1,
               train_instance_type=local_instance_type,
               sagemaker_session=sagemaker_local_session,
               image_name=docker_image,
               framework_version=framework_version,
               output_path='file://{}'.format(tmpdir))

    input_path = 'file://{}'.format(os.path.join(ONNX_PATH, 'mxnet_module'))
    mx.fit({'train': input_path})

    local_mode_utils.assert_output_files_exist(str(tmpdir), 'model',
                                               ['model.onnx'])
Ejemplo n.º 18
0
def test_attach_custom_image(sagemaker_session):
    training_image = "ubuntu:latest"
    returned_job_description = {
        "AlgorithmSpecification": {"TrainingInputMode": "File", "TrainingImage": training_image},
        "HyperParameters": {
            "sagemaker_submit_directory": '"s3://some/sourcedir.tar.gz"',
            "sagemaker_program": '"iris-dnn-classifier.py"',
            "sagemaker_s3_uri_training": '"sagemaker-3/integ-test-data/tf_iris"',
            "sagemaker_container_log_level": '"logging.INFO"',
            "sagemaker_job_name": '"neo"',
            "training_steps": "100",
            "sagemaker_region": '"us-west-2"',
        },
        "RoleArn": "arn:aws:iam::366:role/SageMakerRole",
        "ResourceConfig": {
            "VolumeSizeInGB": 30,
            "InstanceCount": 1,
            "InstanceType": "ml.c4.xlarge",
        },
        "StoppingCondition": {"MaxRuntimeInSeconds": 24 * 60 * 60},
        "TrainingJobName": "neo",
        "TrainingJobStatus": "Completed",
        "TrainingJobArn": "arn:aws:sagemaker:us-west-2:336:training-job/neo",
        "OutputDataConfig": {"KmsKeyId": "", "S3OutputPath": "s3://place/output/neo"},
        "TrainingJobOutput": {"S3TrainingJobOutput": "s3://here/output.tar.gz"},
    }
    sagemaker_session.sagemaker_client.describe_training_job = Mock(
        name="describe_training_job", return_value=returned_job_description
    )

    estimator = MXNet.attach(training_job_name="neo", sagemaker_session=sagemaker_session)
    assert estimator.image_uri == training_image
    assert estimator.training_image_uri() == training_image
def mxnet_model(sagemaker_local_session):
    script_path = os.path.join(DATA_DIR, 'mxnet_mnist', 'mnist.py')
    data_path = os.path.join(DATA_DIR, 'mxnet_mnist')

    mx = MXNet(entry_point=script_path, role='SageMakerRole',
               train_instance_count=1, train_instance_type='local',
               sagemaker_session=sagemaker_local_session)

    train_input = mx.sagemaker_session.upload_data(path=os.path.join(data_path, 'train'),
                                                   key_prefix='integ-test-data/mxnet_mnist/train')
    test_input = mx.sagemaker_session.upload_data(path=os.path.join(data_path, 'test'),
                                                  key_prefix='integ-test-data/mxnet_mnist/test')

    mx.fit({'train': train_input, 'test': test_input})
    model = mx.create_model(1)
    return model
def test_empty_framework_version(warning, sagemaker_session):
    mx = MXNet(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
               train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
               framework_version=None)

    assert mx.framework_version == defaults.MXNET_VERSION
    warning.assert_called_with(defaults.MXNET_VERSION, mx.LATEST_VERSION)
Ejemplo n.º 21
0
def test_s3_input_mode(sagemaker_session, tuner):
    expected_input_mode = "Pipe"

    script_path = os.path.join(DATA_DIR, "mxnet_mnist", "failure_script.py")
    mxnet = MXNet(
        entry_point=script_path,
        role=ROLE,
        framework_version=FRAMEWORK_VERSION,
        train_instance_count=TRAIN_INSTANCE_COUNT,
        train_instance_type=TRAIN_INSTANCE_TYPE,
        sagemaker_session=sagemaker_session,
    )
    tuner.estimator = mxnet

    tags = [{"Name": "some-tag-without-a-value"}]
    tuner.tags = tags

    hyperparameter_ranges = {
        "num_components": IntegerParameter(2, 4),
        "algorithm_mode": CategoricalParameter(["regular", "randomized"]),
    }
    tuner._hyperparameter_ranges = hyperparameter_ranges

    tuner.fit(inputs=s3_input("s3://mybucket/train_manifest", input_mode=expected_input_mode))

    actual_input_mode = sagemaker_session.method_calls[1][2]["input_mode"]
    assert actual_input_mode == expected_input_mode
def test_mxnet_airflow_config_uploads_data_source_to_s3(
    sagemaker_session, cpu_instance_type, mxnet_full_version
):
    with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
        script_path = os.path.join(DATA_DIR, "chainer_mnist", "mnist.py")
        data_path = os.path.join(DATA_DIR, "chainer_mnist")

        mx = MXNet(
            entry_point=script_path,
            role=ROLE,
            framework_version=mxnet_full_version,
            py_version=PYTHON_VERSION,
            train_instance_count=SINGLE_INSTANCE_COUNT,
            train_instance_type=cpu_instance_type,
            sagemaker_session=sagemaker_session,
        )

        train_input = "file://" + os.path.join(data_path, "train")
        test_input = "file://" + os.path.join(data_path, "test")

        training_config = _build_airflow_workflow(
            estimator=mx,
            instance_type=cpu_instance_type,
            inputs={"train": train_input, "test": test_input},
        )

        _assert_that_s3_url_contains_data(
            sagemaker_session,
            training_config["HyperParameters"]["sagemaker_submit_directory"].strip('"'),
        )
Ejemplo n.º 23
0
def test_transform_mxnet(sagemaker_session, mxnet_full_version):
    data_path = os.path.join(DATA_DIR, 'mxnet_mnist')
    script_path = os.path.join(data_path, 'mnist.py')

    mx = MXNet(entry_point=script_path,
               role='SageMakerRole',
               train_instance_count=1,
               train_instance_type='ml.c4.xlarge',
               sagemaker_session=sagemaker_session,
               framework_version=mxnet_full_version)

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'train'),
        key_prefix='integ-test-data/mxnet_mnist/train')
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'test'),
        key_prefix='integ-test-data/mxnet_mnist/test')
    job_name = unique_name_from_base('test-mxnet-transform')

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({'train': train_input, 'test': test_input}, job_name=job_name)

    transform_input_path = os.path.join(data_path, 'transform', 'data.csv')
    transform_input_key_prefix = 'integ-test-data/mxnet_mnist/transform'
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix)

    kms_key_arn = get_or_create_kms_key(sagemaker_session)
    output_filter = "$"

    transformer = _create_transformer_and_transform_job(
        mx,
        transform_input,
        kms_key_arn,
        input_filter=None,
        output_filter=output_filter,
        join_source=None)
    with timeout_and_delete_model_with_transformer(
            transformer,
            sagemaker_session,
            minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
        transformer.wait()

    job_desc = transformer.sagemaker_session.sagemaker_client.describe_transform_job(
        TransformJobName=transformer.latest_transform_job.name)
    assert kms_key_arn == job_desc['TransformResources']['VolumeKmsKeyId']
    assert output_filter == job_desc['DataProcessing']['OutputFilter']
Ejemplo n.º 24
0
def test_requirements_file(image_uri, sagemaker_local_session,
                           local_instance_type, framework_version, tmpdir):
    mx = MXNet(
        entry_point='entry.py',
        source_dir=SOURCE_PATH,
        role='SageMakerRole',
        train_instance_count=1,
        train_instance_type=local_instance_type,
        image_name=image_uri,
        framework_version=framework_version,
        output_path='file://{}'.format(tmpdir),
        sagemaker_session=sagemaker_local_session,
    )

    mx.fit()
    local_mode_utils.assert_output_files_exist(str(tmpdir), 'output',
                                               MODEL_SUCCESS_FILES['output'])
Ejemplo n.º 25
0
def test_create_model_with_optional_params(sagemaker_session):
    container_log_level = '"logging.INFO"'
    source_dir = 's3://mybucket/source'
    enable_cloudwatch_metrics = 'true'
    mx = MXNet(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
               train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
               container_log_level=container_log_level, base_job_name='job', source_dir=source_dir,
               enable_cloudwatch_metrics=enable_cloudwatch_metrics)

    mx.fit(inputs='s3://mybucket/train', job_name='new_name')

    new_role = 'role'
    model_server_workers = 2
    model = mx.create_model(role=new_role, model_server_workers=model_server_workers)

    assert model.role == new_role
    assert model.model_server_workers == model_server_workers
def test_stop_transform_job(sagemaker_session, mxnet_full_version, cpu_instance_type):
    data_path = os.path.join(DATA_DIR, "mxnet_mnist")
    script_path = os.path.join(data_path, "mnist.py")
    tags = [{"Key": "some-tag", "Value": "value-for-tag"}]

    mx = MXNet(
        entry_point=script_path,
        role="SageMakerRole",
        train_instance_count=1,
        train_instance_type=cpu_instance_type,
        sagemaker_session=sagemaker_session,
        framework_version=mxnet_full_version,
    )

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "train"), key_prefix="integ-test-data/mxnet_mnist/train"
    )
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "test"), key_prefix="integ-test-data/mxnet_mnist/test"
    )
    job_name = unique_name_from_base("test-mxnet-transform")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({"train": train_input, "test": test_input}, job_name=job_name)

    transform_input_path = os.path.join(data_path, "transform", "data.csv")
    transform_input_key_prefix = "integ-test-data/mxnet_mnist/transform"
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix
    )

    transformer = mx.transformer(1, cpu_instance_type, tags=tags)
    transformer.transform(transform_input, content_type="text/csv")

    time.sleep(15)

    latest_transform_job_name = transformer.latest_transform_job.name

    print("Attempting to stop {}".format(latest_transform_job_name))

    transformer.stop_transform_job()

    desc = transformer.latest_transform_job.sagemaker_session.sagemaker_client.describe_transform_job(
        TransformJobName=latest_transform_job_name
    )
    assert desc["TransformJobStatus"] == "Stopped"
Ejemplo n.º 27
0
def test_single_machine(docker_image, sagemaker_local_session, local_instance_type,
                        framework_version, tmpdir):
    mx = MXNet(entry_point=SCRIPT_PATH, role='SageMakerRole', train_instance_count=1,
               train_instance_type=local_instance_type, sagemaker_session=sagemaker_local_session,
               image_name=docker_image, framework_version=framework_version,
               output_path='file://{}'.format(tmpdir))

    _train_and_assert_success(mx, str(tmpdir))
Ejemplo n.º 28
0
def create_mxnet_estimator(session: Session, descriptor: BenchmarkDescriptor,
                           source_dir: str,
                           config: SageMakerExecutorConfig) -> Framework:
    kwargs = _create_common_estimator_args(session, descriptor, source_dir,
                                           config)
    logger.info(f"Creating MXNet Estimator with parameters {kwargs}")
    hps = get_hyper_params(descriptor)
    return MXNet(**kwargs, hyperparameters=hps)
Ejemplo n.º 29
0
def test_attach(sagemaker_session, mxnet_version):
    training_image = '1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-py2-cpu:{}-cpu-py2'.format(
        mxnet_version)
    returned_job_description = {
        'AlgorithmSpecification': {
            'TrainingInputMode': 'File',
            'TrainingImage': training_image
        },
        'HyperParameters': {
            'sagemaker_submit_directory': '"s3://some/sourcedir.tar.gz"',
            'sagemaker_program': '"iris-dnn-classifier.py"',
            'sagemaker_s3_uri_training':
            '"sagemaker-3/integ-test-data/tf_iris"',
            'sagemaker_enable_cloudwatch_metrics': 'false',
            'sagemaker_container_log_level': '"logging.INFO"',
            'sagemaker_job_name': '"neo"',
            'training_steps': '100',
            'sagemaker_region': '"us-west-2"'
        },
        'RoleArn': 'arn:aws:iam::366:role/SageMakerRole',
        'ResourceConfig': {
            'VolumeSizeInGB': 30,
            'InstanceCount': 1,
            'InstanceType': 'ml.c4.xlarge'
        },
        'StoppingCondition': {
            'MaxRuntimeInSeconds': 24 * 60 * 60
        },
        'TrainingJobName': 'neo',
        'TrainingJobStatus': 'Completed',
        'TrainingJobArn': 'arn:aws:sagemaker:us-west-2:336:training-job/neo',
        'OutputDataConfig': {
            'KmsKeyId': '',
            'S3OutputPath': 's3://place/output/neo'
        },
        'TrainingJobOutput': {
            'S3TrainingJobOutput': 's3://here/output.tar.gz'
        }
    }
    sagemaker_session.sagemaker_client.describe_training_job = Mock(
        name='describe_training_job', return_value=returned_job_description)

    estimator = MXNet.attach(training_job_name='neo',
                             sagemaker_session=sagemaker_session)
    assert estimator.latest_training_job.job_name == 'neo'
    assert estimator.py_version == 'py2'
    assert estimator.framework_version == mxnet_version
    assert estimator.role == 'arn:aws:iam::366:role/SageMakerRole'
    assert estimator.train_instance_count == 1
    assert estimator.train_max_run == 24 * 60 * 60
    assert estimator.input_mode == 'File'
    assert estimator.base_job_name == 'neo'
    assert estimator.output_path == 's3://place/output/neo'
    assert estimator.output_kms_key == ''
    assert estimator.hyperparameters()['training_steps'] == '100'
    assert estimator.source_dir == 's3://some/sourcedir.tar.gz'
    assert estimator.entry_point == 'iris-dnn-classifier.py'
    assert estimator.tags == LIST_TAGS_RESULT['Tags']
Ejemplo n.º 30
0
def test_attach(sagemaker_session, mxnet_version):
    training_image = "1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-py2-cpu:{}-cpu-py2".format(
        mxnet_version)
    returned_job_description = {
        "AlgorithmSpecification": {
            "TrainingInputMode": "File",
            "TrainingImage": training_image
        },
        "HyperParameters": {
            "sagemaker_submit_directory": '"s3://some/sourcedir.tar.gz"',
            "sagemaker_program": '"iris-dnn-classifier.py"',
            "sagemaker_s3_uri_training":
            '"sagemaker-3/integ-test-data/tf_iris"',
            "sagemaker_enable_cloudwatch_metrics": "false",
            "sagemaker_container_log_level": '"logging.INFO"',
            "sagemaker_job_name": '"neo"',
            "training_steps": "100",
            "sagemaker_region": '"us-west-2"',
        },
        "RoleArn": "arn:aws:iam::366:role/SageMakerRole",
        "ResourceConfig": {
            "VolumeSizeInGB": 30,
            "InstanceCount": 1,
            "InstanceType": "ml.c4.xlarge",
        },
        "StoppingCondition": {
            "MaxRuntimeInSeconds": 24 * 60 * 60
        },
        "TrainingJobName": "neo",
        "TrainingJobStatus": "Completed",
        "TrainingJobArn": "arn:aws:sagemaker:us-west-2:336:training-job/neo",
        "OutputDataConfig": {
            "KmsKeyId": "",
            "S3OutputPath": "s3://place/output/neo"
        },
        "TrainingJobOutput": {
            "S3TrainingJobOutput": "s3://here/output.tar.gz"
        },
    }
    sagemaker_session.sagemaker_client.describe_training_job = Mock(
        name="describe_training_job", return_value=returned_job_description)

    estimator = MXNet.attach(training_job_name="neo",
                             sagemaker_session=sagemaker_session)
    assert estimator.latest_training_job.job_name == "neo"
    assert estimator.py_version == "py2"
    assert estimator.framework_version == mxnet_version
    assert estimator.role == "arn:aws:iam::366:role/SageMakerRole"
    assert estimator.train_instance_count == 1
    assert estimator.train_max_run == 24 * 60 * 60
    assert estimator.input_mode == "File"
    assert estimator.base_job_name == "neo"
    assert estimator.output_path == "s3://place/output/neo"
    assert estimator.output_kms_key == ""
    assert estimator.hyperparameters()["training_steps"] == "100"
    assert estimator.source_dir == "s3://some/sourcedir.tar.gz"
    assert estimator.entry_point == "iris-dnn-classifier.py"
    assert estimator.tags == LIST_TAGS_RESULT["Tags"]
Ejemplo n.º 31
0
def test_mxnet_local_mode(sagemaker_local_session, mxnet_full_version):
    script_path = os.path.join(DATA_DIR, 'mxnet_mnist', 'mnist.py')
    data_path = os.path.join(DATA_DIR, 'mxnet_mnist')

    mx = MXNet(entry_point=script_path,
               role='SageMakerRole',
               py_version=PYTHON_VERSION,
               train_instance_count=1,
               train_instance_type='local',
               sagemaker_session=sagemaker_local_session,
               framework_version=mxnet_full_version)

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'train'),
        key_prefix='integ-test-data/mxnet_mnist/train')
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'test'),
        key_prefix='integ-test-data/mxnet_mnist/test')

    mx.fit({'train': train_input, 'test': test_input})
    endpoint_name = mx.latest_training_job.name

    with local_mode_utils.lock():
        try:
            predictor = mx.deploy(1, 'local', endpoint_name=endpoint_name)
            data = numpy.zeros(shape=(1, 1, 28, 28))
            predictor.predict(data)
        finally:
            mx.delete_endpoint()
def test_mxnet_local_data_local_script(mxnet_full_version):
    data_path = os.path.join(DATA_DIR, "mxnet_mnist")
    script_path = os.path.join(data_path, "mnist.py")

    mx = MXNet(
        entry_point=script_path,
        role="SageMakerRole",
        train_instance_count=1,
        train_instance_type="local",
        framework_version=mxnet_full_version,
        sagemaker_session=LocalNoS3Session(),
    )

    train_input = "file://" + os.path.join(data_path, "train")
    test_input = "file://" + os.path.join(data_path, "test")

    mx.fit({"train": train_input, "test": test_input})
    endpoint_name = mx.latest_training_job.name

    with lock.lock(LOCK_PATH):
        try:
            predictor = mx.deploy(1, "local", endpoint_name=endpoint_name)
            data = numpy.zeros(shape=(1, 1, 28, 28))
            predictor.predict(data)
        finally:
            mx.delete_endpoint()
def test_mxnet_local_mode(sagemaker_local_session, mxnet_full_version):
    script_path = os.path.join(DATA_DIR, "mxnet_mnist", "mnist.py")
    data_path = os.path.join(DATA_DIR, "mxnet_mnist")

    mx = MXNet(
        entry_point=script_path,
        role="SageMakerRole",
        py_version=PYTHON_VERSION,
        train_instance_count=1,
        train_instance_type="local",
        sagemaker_session=sagemaker_local_session,
        framework_version=mxnet_full_version,
    )

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "train"),
        key_prefix="integ-test-data/mxnet_mnist/train")
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "test"),
        key_prefix="integ-test-data/mxnet_mnist/test")

    mx.fit({"train": train_input, "test": test_input})
    endpoint_name = mx.latest_training_job.name

    with lock.lock(LOCK_PATH):
        try:
            predictor = mx.deploy(1, "local", endpoint_name=endpoint_name)
            data = numpy.zeros(shape=(1, 1, 28, 28))
            predictor.predict(data)
        finally:
            mx.delete_endpoint()
Ejemplo n.º 34
0
def test_mxnet_local_data_local_script():
    local_mode_lock_fd = open(LOCK_PATH, 'w')
    local_mode_lock = local_mode_lock_fd.fileno()

    script_path = os.path.join(DATA_DIR, 'mxnet_mnist', 'mnist.py')
    data_path = os.path.join(DATA_DIR, 'mxnet_mnist')

    mx = MXNet(entry_point=script_path,
               role='SageMakerRole',
               train_instance_count=1,
               train_instance_type='local',
               sagemaker_session=LocalNoS3Session())

    train_input = 'file://' + os.path.join(data_path, 'train')
    test_input = 'file://' + os.path.join(data_path, 'test')

    mx.fit({'train': train_input, 'test': test_input})
    endpoint_name = mx.latest_training_job.name
    try:
        # Since Local Mode uses the same port for serving, we need a lock in order
        # to allow concurrent test execution. The serving test is really fast so it still
        # makes sense to allow this behavior.
        fcntl.lockf(local_mode_lock, fcntl.LOCK_EX)
        predictor = mx.deploy(1, 'local', endpoint_name=endpoint_name)
        data = numpy.zeros(shape=(1, 1, 28, 28))
        predictor.predict(data)
    finally:
        mx.delete_endpoint()
        time.sleep(5)
        fcntl.lockf(local_mode_lock, fcntl.LOCK_UN)
def test_mxnet_local_data_local_script():
    local_mode_lock_fd = open(LOCK_PATH, 'w')
    local_mode_lock = local_mode_lock_fd.fileno()

    script_path = os.path.join(DATA_DIR, 'mxnet_mnist', 'mnist.py')
    data_path = os.path.join(DATA_DIR, 'mxnet_mnist')

    mx = MXNet(entry_point=script_path, role='SageMakerRole',
               train_instance_count=1, train_instance_type='local',
               sagemaker_session=LocalNoS3Session())

    train_input = 'file://' + os.path.join(data_path, 'train')
    test_input = 'file://' + os.path.join(data_path, 'test')

    mx.fit({'train': train_input, 'test': test_input})
    endpoint_name = mx.latest_training_job.name
    try:
        # Since Local Mode uses the same port for serving, we need a lock in order
        # to allow concurrent test execution. The serving test is really fast so it still
        # makes sense to allow this behavior.
        fcntl.lockf(local_mode_lock, fcntl.LOCK_EX)
        predictor = mx.deploy(1, 'local', endpoint_name=endpoint_name)
        data = numpy.zeros(shape=(1, 1, 28, 28))
        predictor.predict(data)
    finally:
        mx.delete_endpoint()
        time.sleep(5)
        fcntl.lockf(local_mode_lock, fcntl.LOCK_UN)
Ejemplo n.º 36
0
def test_create_model_with_custom_image(sagemaker_session):
    container_log_level = '"logging.INFO"'
    source_dir = 's3://mybucket/source'
    enable_cloudwatch_metrics = 'true'
    custom_image = 'mxnet:2.0'
    mx = MXNet(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
               train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
               image_name=custom_image, container_log_level=container_log_level,
               base_job_name='job', source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics)

    job_name = 'new_name'
    mx.fit(inputs='s3://mybucket/train', job_name='new_name')
    model = mx.create_model()

    assert model.sagemaker_session == sagemaker_session
    assert model.image == custom_image
    assert model.entry_point == SCRIPT_PATH
    assert model.role == ROLE
    assert model.name == job_name
    assert model.container_log_level == container_log_level
    assert model.source_dir == source_dir
    assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics
def test_transform_mxnet(sagemaker_session):
    data_path = os.path.join(DATA_DIR, 'mxnet_mnist')
    script_path = os.path.join(data_path, 'mnist.py')

    mx = MXNet(entry_point=script_path, role='SageMakerRole', train_instance_count=1,
               train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session)

    train_input = mx.sagemaker_session.upload_data(path=os.path.join(data_path, 'train'),
                                                   key_prefix='integ-test-data/mxnet_mnist/train')
    test_input = mx.sagemaker_session.upload_data(path=os.path.join(data_path, 'test'),
                                                  key_prefix='integ-test-data/mxnet_mnist/test')

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({'train': train_input, 'test': test_input})

    transform_input_path = os.path.join(data_path, 'transform', 'data.csv')
    transform_input_key_prefix = 'integ-test-data/mxnet_mnist/transform'
    transform_input = mx.sagemaker_session.upload_data(path=transform_input_path,
                                                       key_prefix=transform_input_key_prefix)

    transformer = _create_transformer_and_transform_job(mx, transform_input)
    transformer.wait()
Ejemplo n.º 38
0
def test_attach(sagemaker_session, mxnet_version):
    training_image = '1.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet-py2-cpu:{}-cpu-py2'.format(mxnet_version)
    returned_job_description = {
        'AlgorithmSpecification': {
            'TrainingInputMode': 'File',
            'TrainingImage': training_image
        },
        'HyperParameters': {
            'sagemaker_submit_directory': '"s3://some/sourcedir.tar.gz"',
            'sagemaker_program': '"iris-dnn-classifier.py"',
            'sagemaker_s3_uri_training': '"sagemaker-3/integ-test-data/tf_iris"',
            'sagemaker_enable_cloudwatch_metrics': 'false',
            'sagemaker_container_log_level': '"logging.INFO"',
            'sagemaker_job_name': '"neo"',
            'training_steps': '100',
            'sagemaker_region': '"us-west-2"'
        },
        'RoleArn': 'arn:aws:iam::366:role/SageMakerRole',
        'ResourceConfig': {
            'VolumeSizeInGB': 30,
            'InstanceCount': 1,
            'InstanceType': 'ml.c4.xlarge'
        },
        'StoppingCondition': {'MaxRuntimeInSeconds': 24 * 60 * 60},
        'TrainingJobName': 'neo',
        'TrainingJobStatus': 'Completed',
        'OutputDataConfig': {
            'KmsKeyId': '',
            'S3OutputPath': 's3://place/output/neo'
        },
        'TrainingJobOutput': {'S3TrainingJobOutput': 's3://here/output.tar.gz'}
    }
    sagemaker_session.sagemaker_client.describe_training_job = Mock(name='describe_training_job',
                                                                    return_value=returned_job_description)

    estimator = MXNet.attach(training_job_name='neo', sagemaker_session=sagemaker_session)
    assert estimator.latest_training_job.job_name == 'neo'
    assert estimator.py_version == 'py2'
    assert estimator.framework_version == mxnet_version
    assert estimator.role == 'arn:aws:iam::366:role/SageMakerRole'
    assert estimator.train_instance_count == 1
    assert estimator.train_max_run == 24 * 60 * 60
    assert estimator.input_mode == 'File'
    assert estimator.base_job_name == 'neo'
    assert estimator.output_path == 's3://place/output/neo'
    assert estimator.output_kms_key == ''
    assert estimator.hyperparameters()['training_steps'] == '100'
    assert estimator.source_dir == 's3://some/sourcedir.tar.gz'
    assert estimator.entry_point == 'iris-dnn-classifier.py'
Ejemplo n.º 39
0
def test_mxnet(strftime, sagemaker_session, mxnet_version):
    mx = MXNet(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
               train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
               framework_version=mxnet_version)

    inputs = 's3://mybucket/train'

    mx.fit(inputs=inputs)

    sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
    assert sagemaker_call_names == ['train', 'logs_for_job']
    boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
    assert boto_call_names == ['resource']

    expected_train_args = _create_train_job(mxnet_version)
    expected_train_args['input_config'][0]['DataSource']['S3DataSource']['S3Uri'] = inputs

    actual_train_args = sagemaker_session.method_calls[0][2]
    assert actual_train_args == expected_train_args

    model = mx.create_model()

    expected_image_base = '520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet:{}-gpu-py2'
    environment = {
        'Environment': {
            'SAGEMAKER_SUBMIT_DIRECTORY': 's3://mybucket/sagemaker-mxnet-{}/source/sourcedir.tar.gz'.format(TIMESTAMP),
            'SAGEMAKER_PROGRAM': 'dummy_script.py', 'SAGEMAKER_ENABLE_CLOUDWATCH_METRICS': 'false',
            'SAGEMAKER_REGION': 'us-west-2', 'SAGEMAKER_CONTAINER_LOG_LEVEL': '20'
        },
        'Image': expected_image_base.format(mxnet_version), 'ModelDataUrl': 's3://m/m.tar.gz'
    }
    assert environment == model.prepare_container_def(GPU)

    assert 'cpu' in model.prepare_container_def(CPU)['Image']
    predictor = mx.deploy(1, GPU)
    assert isinstance(predictor, MXNetPredictor)
Ejemplo n.º 40
0
def test_train_image_default(sagemaker_session):
    mx = MXNet(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
               train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE)

    assert _get_full_image_uri(defaults.MXNET_VERSION) in mx.train_image()