Beispiel #1
0
def test_estimator_wrong_version_launch_parameter_server(sagemaker_session):
    with pytest.raises(ValueError) as e:
        MXNet(
            entry_point=SCRIPT_PATH,
            role=ROLE,
            sagemaker_session=sagemaker_session,
            train_instance_count=INSTANCE_COUNT,
            train_instance_type=INSTANCE_TYPE,
            distributions=LAUNCH_PS_DISTRIBUTIONS_DICT,
            framework_version="1.2.1",
        )
    assert "The distributions option is valid for only versions 1.3 and higher" in str(
        e)
Beispiel #2
0
def test_mx_enable_sm_metrics(sagemaker_session, mxnet_training_version,
                              mxnet_training_py_version):
    mx = MXNet(
        entry_point=SCRIPT_PATH,
        framework_version=mxnet_training_version,
        py_version=mxnet_training_py_version,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        instance_count=INSTANCE_COUNT,
        instance_type=INSTANCE_TYPE,
        enable_sagemaker_metrics=True,
    )
    assert mx.enable_sagemaker_metrics
Beispiel #3
0
def test_estimator_py2_warning(warning, sagemaker_session):
    estimator = MXNet(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        train_instance_count=INSTANCE_COUNT,
        train_instance_type=INSTANCE_TYPE,
        py_version="py2",
    )

    assert estimator.py_version == "py2"
    warning.assert_called_with(estimator.__framework_name__,
                               defaults.LATEST_PY2_VERSION)
Beispiel #4
0
def test_distributed_mnist_training(docker_image, sagemaker_local_session,
                                    framework_version, tmpdir):
    mx = MXNet(entry_point=SCRIPT_PATH,
               role='SageMakerRole',
               train_instance_count=2,
               train_instance_type='local',
               sagemaker_session=sagemaker_local_session,
               image_name=docker_image,
               framework_version=framework_version,
               output_path='file://{}'.format(tmpdir),
               hyperparameters={'sagemaker_parameter_server_enabled': True})

    _train_and_assert_success(mx, str(tmpdir))
Beispiel #5
0
def test_estimator_script_mode_dont_launch_parameter_server(sagemaker_session):
    mx = MXNet(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        train_instance_count=INSTANCE_COUNT,
        train_instance_type=INSTANCE_TYPE,
        distributions={"parameter_server": {
            "enabled": False
        }},
        framework_version="1.3.0",
    )
    assert mx.hyperparameters().get(MXNet.LAUNCH_PS_ENV_NAME) == "false"
def test_transform_mxnet_vpc(sagemaker_session, mxnet_full_version, cpu_instance_type):
    data_path = os.path.join(DATA_DIR, "mxnet_mnist")
    script_path = os.path.join(data_path, "mnist.py")

    ec2_client = sagemaker_session.boto_session.client("ec2")
    subnet_ids, security_group_id = get_or_create_vpc_resources(ec2_client)

    mx = MXNet(
        entry_point=script_path,
        role="SageMakerRole",
        train_instance_count=1,
        train_instance_type=cpu_instance_type,
        sagemaker_session=sagemaker_session,
        framework_version=mxnet_full_version,
        subnets=subnet_ids,
        security_group_ids=[security_group_id],
    )

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "train"), key_prefix="integ-test-data/mxnet_mnist/train"
    )
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "test"), key_prefix="integ-test-data/mxnet_mnist/test"
    )
    job_name = unique_name_from_base("test-mxnet-vpc")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({"train": train_input, "test": test_input}, job_name=job_name)

    job_desc = sagemaker_session.sagemaker_client.describe_training_job(
        TrainingJobName=mx.latest_training_job.name
    )
    assert set(subnet_ids) == set(job_desc["VpcConfig"]["Subnets"])
    assert [security_group_id] == job_desc["VpcConfig"]["SecurityGroupIds"]

    transform_input_path = os.path.join(data_path, "transform", "data.csv")
    transform_input_key_prefix = "integ-test-data/mxnet_mnist/transform"
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix
    )

    transformer = _create_transformer_and_transform_job(mx, transform_input, cpu_instance_type)
    with timeout_and_delete_model_with_transformer(
        transformer, sagemaker_session, minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES
    ):
        transformer.wait()
        model_desc = sagemaker_session.sagemaker_client.describe_model(
            ModelName=transformer.model_name
        )
        assert set(subnet_ids) == set(model_desc["VpcConfig"]["Subnets"])
        assert [security_group_id] == model_desc["VpcConfig"]["SecurityGroupIds"]
Beispiel #7
0
def test_local_transform_mxnet(
    sagemaker_local_session,
    tmpdir,
    mxnet_inference_latest_version,
    mxnet_inference_latest_py_version,
    cpu_instance_type,
):
    data_path = os.path.join(DATA_DIR, "mxnet_mnist")
    script_path = os.path.join(data_path, "mnist.py")

    mx = MXNet(
        entry_point=script_path,
        role="SageMakerRole",
        instance_count=1,
        instance_type="local",
        framework_version=mxnet_inference_latest_version,
        py_version=mxnet_inference_latest_py_version,
        sagemaker_session=sagemaker_local_session,
    )

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "train"), key_prefix="integ-test-data/mxnet_mnist/train"
    )
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "test"), key_prefix="integ-test-data/mxnet_mnist/test"
    )

    with stopit.ThreadingTimeout(5 * 60, swallow_exc=False):
        mx.fit({"train": train_input, "test": test_input})

    transform_input_path = os.path.join(data_path, "transform")
    transform_input_key_prefix = "integ-test-data/mxnet_mnist/transform"
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix
    )

    output_path = "file://%s" % (str(tmpdir))
    transformer = mx.transformer(
        1,
        "local",
        assemble_with="Line",
        max_payload=1,
        strategy="SingleRecord",
        output_path=output_path,
    )

    with lock.lock(LOCK_PATH):
        transformer.transform(transform_input, content_type="text/csv", split_type="Line")
        transformer.wait()

    assert os.path.exists(os.path.join(str(tmpdir), "data.csv.out"))
def test_mxnet(strftime, sagemaker_session, mxnet_version):
    mx = MXNet(entry_point=SCRIPT_PATH,
               role=ROLE,
               sagemaker_session=sagemaker_session,
               train_instance_count=INSTANCE_COUNT,
               train_instance_type=INSTANCE_TYPE,
               framework_version=mxnet_version)

    inputs = 's3://mybucket/train'

    mx.fit(inputs=inputs)

    sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
    assert sagemaker_call_names == ['train', 'logs_for_job']
    boto_call_names = [
        c[0] for c in sagemaker_session.boto_session.method_calls
    ]
    assert boto_call_names == ['resource']

    expected_train_args = _create_train_job(mxnet_version)
    expected_train_args['input_config'][0]['DataSource']['S3DataSource'][
        'S3Uri'] = inputs

    actual_train_args = sagemaker_session.method_calls[0][2]
    assert actual_train_args == expected_train_args

    model = mx.create_model()

    expected_image_base = '520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet:{}-gpu-py2'
    environment = {
        'Environment': {
            'SAGEMAKER_SUBMIT_DIRECTORY':
            's3://mybucket/sagemaker-mxnet-{}/source/sourcedir.tar.gz'.format(
                TIMESTAMP),
            'SAGEMAKER_PROGRAM':
            'dummy_script.py',
            'SAGEMAKER_ENABLE_CLOUDWATCH_METRICS':
            'false',
            'SAGEMAKER_REGION':
            'us-west-2',
            'SAGEMAKER_CONTAINER_LOG_LEVEL':
            '20'
        },
        'Image': expected_image_base.format(mxnet_version),
        'ModelDataUrl': 's3://m/m.tar.gz'
    }
    assert environment == model.prepare_container_def(GPU)

    assert 'cpu' in model.prepare_container_def(CPU)['Image']
    predictor = mx.deploy(1, GPU)
    assert isinstance(predictor, MXNetPredictor)
def test_transform_mxnet(sagemaker_session, mxnet_full_version):
    data_path = os.path.join(DATA_DIR, "mxnet_mnist")
    script_path = os.path.join(data_path, "mnist.py")

    mx = MXNet(
        entry_point=script_path,
        role="SageMakerRole",
        train_instance_count=1,
        train_instance_type="ml.c4.xlarge",
        sagemaker_session=sagemaker_session,
        framework_version=mxnet_full_version,
    )

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "train"),
        key_prefix="integ-test-data/mxnet_mnist/train")
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "test"),
        key_prefix="integ-test-data/mxnet_mnist/test")
    job_name = unique_name_from_base("test-mxnet-transform")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({"train": train_input, "test": test_input}, job_name=job_name)

    transform_input_path = os.path.join(data_path, "transform", "data.csv")
    transform_input_key_prefix = "integ-test-data/mxnet_mnist/transform"
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix)

    kms_key_arn = get_or_create_kms_key(sagemaker_session)
    output_filter = "$"

    transformer = _create_transformer_and_transform_job(
        mx,
        transform_input,
        kms_key_arn,
        input_filter=None,
        output_filter=output_filter,
        join_source=None,
    )
    with timeout_and_delete_model_with_transformer(
            transformer,
            sagemaker_session,
            minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
        transformer.wait()

    job_desc = transformer.sagemaker_session.sagemaker_client.describe_transform_job(
        TransformJobName=transformer.latest_transform_job.name)
    assert kms_key_arn == job_desc["TransformResources"]["VolumeKmsKeyId"]
    assert output_filter == job_desc["DataProcessing"]["OutputFilter"]
Beispiel #10
0
def test_mx_missing_environment_variables(
    sagemaker_session, mxnet_training_version, mxnet_training_py_version
):
    mx = MXNet(
        entry_point=SCRIPT_PATH,
        framework_version=mxnet_training_version,
        py_version=mxnet_training_py_version,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        instance_count=INSTANCE_COUNT,
        instance_type=INSTANCE_TYPE,
        environment=None,
    )
    assert not mx.environment
def test_fit_no_inputs(tuner, sagemaker_session):
    script_path = os.path.join(DATA_DIR, 'mxnet_mnist', 'failure_script.py')
    tuner.estimator = MXNet(entry_point=script_path,
                            role=ROLE,
                            framework_version=FRAMEWORK_VERSION,
                            train_instance_count=TRAIN_INSTANCE_COUNT,
                            train_instance_type=TRAIN_INSTANCE_TYPE,
                            sagemaker_session=sagemaker_session)

    tuner.fit()

    _, _, tune_kwargs = sagemaker_session.tune.mock_calls[0]

    assert tune_kwargs['input_config'] is None
def test_mxnet_mms_version(strftime, repack_model, sagemaker_session,
                           mxnet_version, skip_if_not_mms_version):
    mx = MXNet(entry_point=SCRIPT_PATH,
               role=ROLE,
               sagemaker_session=sagemaker_session,
               train_instance_count=INSTANCE_COUNT,
               train_instance_type=INSTANCE_TYPE,
               framework_version=mxnet_version)

    inputs = 's3://mybucket/train'

    mx.fit(inputs=inputs)

    sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
    assert sagemaker_call_names == ['train', 'logs_for_job']
    boto_call_names = [
        c[0] for c in sagemaker_session.boto_session.method_calls
    ]
    assert boto_call_names == ['resource']

    expected_train_args = _create_train_job(mxnet_version)
    expected_train_args['input_config'][0]['DataSource']['S3DataSource'][
        'S3Uri'] = inputs

    actual_train_args = sagemaker_session.method_calls[0][2]
    assert actual_train_args == expected_train_args

    model = mx.create_model()

    expected_image_base = _get_full_image_uri(mxnet_version,
                                              IMAGE_REPO_SERVING_NAME, 'gpu')
    environment = {
        'Environment': {
            'SAGEMAKER_SUBMIT_DIRECTORY':
            's3://mybucket/sagemaker-mxnet-2017-11-06-14:14:15.672/model.tar.gz',
            'SAGEMAKER_PROGRAM': 'dummy_script.py',
            'SAGEMAKER_ENABLE_CLOUDWATCH_METRICS': 'false',
            'SAGEMAKER_REGION': 'us-west-2',
            'SAGEMAKER_CONTAINER_LOG_LEVEL': '20'
        },
        'Image':
        expected_image_base.format(mxnet_version),
        'ModelDataUrl':
        's3://mybucket/sagemaker-mxnet-2017-11-06-14:14:15.672/model.tar.gz'
    }
    assert environment == model.prepare_container_def(GPU)

    assert 'cpu' in model.prepare_container_def(CPU)['Image']
    predictor = mx.deploy(1, GPU)
    assert isinstance(predictor, MXNetPredictor)
Beispiel #13
0
def test_serialize_categorical_ranges_for_frameworks(sagemaker_session, tuner):
    tuner.estimator = MXNet(
        entry_point=SCRIPT_NAME,
        role=ROLE,
        framework_version=FRAMEWORK_VERSION,
        train_instance_count=TRAIN_INSTANCE_COUNT,
        train_instance_type=TRAIN_INSTANCE_TYPE,
        sagemaker_session=sagemaker_session,
    )

    hyperparameter_ranges = tuner.hyperparameter_ranges()

    assert hyperparameter_ranges["CategoricalParameterRanges"][0]["Name"] == "blank"
    assert hyperparameter_ranges["CategoricalParameterRanges"][0]["Values"] == ['"0"', '"5"']
def test_mxnet_neo(strftime, sagemaker_session, neo_mxnet_version):
    mx = MXNet(
        entry_point=SCRIPT_PATH,
        framework_version="1.6",
        py_version="py3",
        role=ROLE,
        sagemaker_session=sagemaker_session,
        instance_count=INSTANCE_COUNT,
        instance_type=INSTANCE_TYPE,
        base_job_name="sagemaker-mxnet",
    )
    mx.fit()

    input_shape = {"data": [100, 1, 28, 28]}
    output_location = "s3://neo-sdk-test"

    compiled_model = mx.compile_model(
        target_instance_family="ml_c4",
        input_shape=input_shape,
        output_path=output_location,
        framework="mxnet",
        framework_version=neo_mxnet_version,
    )

    sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
    assert sagemaker_call_names == [
        "train",
        "logs_for_job",
        "sagemaker_client.describe_training_job",
        "compile_model",
        "wait_for_compilation_job",
    ]

    expected_compile_model_args = _create_compilation_job(
        json.dumps(input_shape), output_location)
    actual_compile_model_args = sagemaker_session.method_calls[3][2]
    assert expected_compile_model_args == actual_compile_model_args

    assert compiled_model.image_uri == _neo_inference_image(neo_mxnet_version)

    predictor = mx.deploy(1, CPU, use_compiled_model=True)
    assert isinstance(predictor, MXNetPredictor)

    with pytest.raises(Exception) as wrong_target:
        mx.deploy(1, CPU_C5, use_compiled_model=True)
    assert str(wrong_target.value).startswith("No compiled model for")

    # deploy without sagemaker Neo should continue to work
    mx.deploy(1, CPU)
Beispiel #15
0
def test_transform_mxnet_vpc(sagemaker_session, mxnet_full_version):
    data_path = os.path.join(DATA_DIR, 'mxnet_mnist')
    script_path = os.path.join(data_path, 'mnist.py')

    ec2_client = sagemaker_session.boto_session.client('ec2')
    subnet_ids, security_group_id = get_or_create_vpc_resources(
        ec2_client, sagemaker_session.boto_session.region_name)

    mx = MXNet(entry_point=script_path,
               role='SageMakerRole',
               train_instance_count=1,
               train_instance_type='ml.c4.xlarge',
               sagemaker_session=sagemaker_session,
               framework_version=mxnet_full_version,
               subnets=subnet_ids,
               security_group_ids=[security_group_id])

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'train'),
        key_prefix='integ-test-data/mxnet_mnist/train')
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'test'),
        key_prefix='integ-test-data/mxnet_mnist/test')
    job_name = unique_name_from_base('test-mxnet-vpc')

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({'train': train_input, 'test': test_input}, job_name=job_name)

    job_desc = sagemaker_session.sagemaker_client.describe_training_job(
        TrainingJobName=mx.latest_training_job.name)
    assert set(subnet_ids) == set(job_desc['VpcConfig']['Subnets'])
    assert [security_group_id] == job_desc['VpcConfig']['SecurityGroupIds']

    transform_input_path = os.path.join(data_path, 'transform', 'data.csv')
    transform_input_key_prefix = 'integ-test-data/mxnet_mnist/transform'
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix)

    transformer = _create_transformer_and_transform_job(mx, transform_input)
    with timeout_and_delete_model_with_transformer(
            transformer,
            sagemaker_session,
            minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
        transformer.wait()
        model_desc = sagemaker_session.sagemaker_client.describe_model(
            ModelName=transformer.model_name)
        assert set(subnet_ids) == set(model_desc['VpcConfig']['Subnets'])
        assert [security_group_id
                ] == model_desc['VpcConfig']['SecurityGroupIds']
def test_single_transformer_multiple_jobs(sagemaker_session,
                                          mxnet_full_version,
                                          cpu_instance_type):
    data_path = os.path.join(DATA_DIR, "mxnet_mnist")
    script_path = os.path.join(data_path, "mnist.py")

    mx = MXNet(
        entry_point=script_path,
        role="SageMakerRole",
        train_instance_count=1,
        train_instance_type=cpu_instance_type,
        sagemaker_session=sagemaker_session,
        framework_version=mxnet_full_version,
    )

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "train"),
        key_prefix="integ-test-data/mxnet_mnist/train")
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "test"),
        key_prefix="integ-test-data/mxnet_mnist/test")
    job_name = unique_name_from_base("test-mxnet-transform")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({"train": train_input, "test": test_input}, job_name=job_name)

    transform_input_path = os.path.join(data_path, "transform", "data.csv")
    transform_input_key_prefix = "integ-test-data/mxnet_mnist/transform"
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix)

    transformer = mx.transformer(1, cpu_instance_type)

    job_name = unique_name_from_base("test-mxnet-transform")
    transformer.transform(transform_input,
                          content_type="text/csv",
                          job_name=job_name)
    with timeout_and_delete_model_with_transformer(
            transformer,
            sagemaker_session,
            minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
        assert transformer.output_path == "s3://{}/{}".format(
            sagemaker_session.default_bucket(), job_name)
        job_name = unique_name_from_base("test-mxnet-transform")
        transformer.transform(transform_input,
                              content_type="text/csv",
                              job_name=job_name)
        assert transformer.output_path == "s3://{}/{}".format(
            sagemaker_session.default_bucket(), job_name)
Beispiel #17
0
def test_onnx_export(docker_image, sagemaker_local_session,
                     local_instance_type, framework_version, tmpdir):
    mx = MXNet(entry_point=SCRIPT_PATH,
               role='SageMakerRole',
               instance_count=1,
               instance_type=local_instance_type,
               sagemaker_session=sagemaker_local_session,
               image_uri=docker_image,
               framework_version=framework_version,
               output_path='file://{}'.format(tmpdir))

    mx.fit()

    local_mode_utils.assert_output_files_exist(str(tmpdir), 'output',
                                               ['success'])
Beispiel #18
0
def test_distributed(docker_image, sagemaker_local_session, framework_version,
                     processor, tmpdir):
    if processor == 'gpu':
        pytest.skip('Local Mode does not support distributed training on GPU.')

    mx = MXNet(entry_point=SCRIPT_PATH,
               role='SageMakerRole',
               train_instance_count=2,
               train_instance_type='local',
               sagemaker_session=sagemaker_local_session,
               image_name=docker_image,
               framework_version=framework_version,
               output_path='file://{}'.format(tmpdir),
               hyperparameters={'sagemaker_parameter_server_enabled': True})
    _train_and_assert_success(mx, str(tmpdir))
Beispiel #19
0
def test_keras_training(docker_image, sagemaker_local_session, local_instance_type,
                        framework_version, tmpdir):
    keras_path = os.path.join(RESOURCE_PATH, 'keras')
    script_path = os.path.join(keras_path, 'keras_mnist.py')

    mx = MXNet(entry_point=script_path, role='SageMakerRole', train_instance_count=1,
               train_instance_type=local_instance_type, sagemaker_session=sagemaker_local_session,
               image_name=docker_image, framework_version=framework_version,
               output_path='file://{}'.format(tmpdir))

    train = 'file://{}'.format(os.path.join(keras_path, 'data'))
    mx.fit({'train': train})

    for directory, files in MODEL_SUCCESS_FILES.items():
        local_mode_utils.assert_output_files_exist(str(tmpdir), directory, files)
def test_local_transform_mxnet(sagemaker_local_session, tmpdir,
                               mxnet_full_version):
    local_mode_lock_fd = open(LOCK_PATH, 'w')
    local_mode_lock = local_mode_lock_fd.fileno()
    data_path = os.path.join(DATA_DIR, 'mxnet_mnist')
    script_path = os.path.join(data_path, 'mnist.py')

    mx = MXNet(entry_point=script_path,
               role='SageMakerRole',
               train_instance_count=1,
               train_instance_type='ml.c4.xlarge',
               framework_version=mxnet_full_version,
               sagemaker_session=sagemaker_local_session)

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'train'),
        key_prefix='integ-test-data/mxnet_mnist/train')
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'test'),
        key_prefix='integ-test-data/mxnet_mnist/test')

    with timeout(minutes=15):
        mx.fit({'train': train_input, 'test': test_input})

    transform_input_path = os.path.join(data_path, 'transform')
    transform_input_key_prefix = 'integ-test-data/mxnet_mnist/transform'
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix)

    output_path = 'file://%s' % (str(tmpdir))
    transformer = mx.transformer(1,
                                 'local',
                                 assemble_with='Line',
                                 max_payload=1,
                                 strategy='SingleRecord',
                                 output_path=output_path)

    # Since Local Mode uses the same port for serving, we need a lock in order
    # to allow concurrent test execution.
    fcntl.lockf(local_mode_lock, fcntl.LOCK_EX)
    transformer.transform(transform_input,
                          content_type='text/csv',
                          split_type='Line')
    transformer.wait()
    time.sleep(5)
    fcntl.lockf(local_mode_lock, fcntl.LOCK_UN)

    assert os.path.exists(os.path.join(str(tmpdir), 'data.csv.out'))
Beispiel #21
0
def test_custom_image_estimator_deploy(
    sagemaker_session, mxnet_training_version, mxnet_training_py_version
):
    custom_image = "mycustomimage:latest"
    mx = MXNet(
        entry_point=SCRIPT_PATH,
        framework_version=mxnet_training_version,
        py_version=mxnet_training_py_version,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        instance_count=INSTANCE_COUNT,
        instance_type=INSTANCE_TYPE,
    )
    mx.fit(inputs="s3://mybucket/train", job_name="new_name")
    model = mx.create_model(image_uri=custom_image)
    assert model.image_uri == custom_image
def test_onnx_export(docker_image, sagemaker_local_session,
                     local_instance_type, framework_version, tmpdir):
    mx = MXNet(entry_point=SCRIPT_PATH,
               role='SageMakerRole',
               train_instance_count=1,
               train_instance_type=local_instance_type,
               sagemaker_session=sagemaker_local_session,
               image_name=docker_image,
               framework_version=framework_version,
               output_path='file://{}'.format(tmpdir))

    input_path = 'file://{}'.format(os.path.join(ONNX_PATH, 'mxnet_module'))
    mx.fit({'train': input_path})

    local_mode_utils.assert_output_files_exist(str(tmpdir), 'model',
                                               ['model.onnx'])
Beispiel #23
0
def test_mxnet(strftime, sagemaker_session, mxnet_version, skip_if_mms_version):
    mx = MXNet(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        train_instance_count=INSTANCE_COUNT,
        train_instance_type=INSTANCE_TYPE,
        framework_version=mxnet_version,
    )

    inputs = "s3://mybucket/train"

    mx.fit(inputs=inputs, experiment_config=EXPERIMENT_CONFIG)

    sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
    assert sagemaker_call_names == ["train", "logs_for_job"]
    boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
    assert boto_call_names == ["resource"]

    expected_train_args = _create_train_job(mxnet_version)
    expected_train_args["input_config"][0]["DataSource"]["S3DataSource"]["S3Uri"] = inputs
    expected_train_args["experiment_config"] = EXPERIMENT_CONFIG

    actual_train_args = sagemaker_session.method_calls[0][2]
    assert actual_train_args == expected_train_args

    model = mx.create_model()

    expected_image_base = "520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet:{}-gpu-py2"
    environment = {
        "Environment": {
            "SAGEMAKER_SUBMIT_DIRECTORY": "s3://mybucket/sagemaker-mxnet-{}/source/sourcedir.tar.gz".format(
                TIMESTAMP
            ),
            "SAGEMAKER_PROGRAM": "dummy_script.py",
            "SAGEMAKER_ENABLE_CLOUDWATCH_METRICS": "false",
            "SAGEMAKER_REGION": "us-west-2",
            "SAGEMAKER_CONTAINER_LOG_LEVEL": "20",
        },
        "Image": expected_image_base.format(mxnet_version),
        "ModelDataUrl": "s3://m/m.tar.gz",
    }
    assert environment == model.prepare_container_def(GPU)

    assert "cpu" in model.prepare_container_def(CPU)["Image"]
    predictor = mx.deploy(1, GPU)
    assert isinstance(predictor, MXNetPredictor)
Beispiel #24
0
def test_transform_mxnet(sagemaker_session, mxnet_full_version):
    data_path = os.path.join(DATA_DIR, 'mxnet_mnist')
    script_path = os.path.join(data_path, 'mnist.py')

    mx = MXNet(entry_point=script_path,
               role='SageMakerRole',
               train_instance_count=1,
               train_instance_type='ml.c4.xlarge',
               sagemaker_session=sagemaker_session,
               framework_version=mxnet_full_version)

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'train'),
        key_prefix='integ-test-data/mxnet_mnist/train')
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'test'),
        key_prefix='integ-test-data/mxnet_mnist/test')
    job_name = unique_name_from_base('test-mxnet-transform')

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({'train': train_input, 'test': test_input}, job_name=job_name)

    transform_input_path = os.path.join(data_path, 'transform', 'data.csv')
    transform_input_key_prefix = 'integ-test-data/mxnet_mnist/transform'
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix)

    kms_key_arn = get_or_create_kms_key(sagemaker_session)
    output_filter = "$"

    transformer = _create_transformer_and_transform_job(
        mx,
        transform_input,
        kms_key_arn,
        input_filter=None,
        output_filter=output_filter,
        join_source=None)
    with timeout_and_delete_model_with_transformer(
            transformer,
            sagemaker_session,
            minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
        transformer.wait()

    job_desc = transformer.sagemaker_session.sagemaker_client.describe_transform_job(
        TransformJobName=transformer.latest_transform_job.name)
    assert kms_key_arn == job_desc['TransformResources']['VolumeKmsKeyId']
    assert output_filter == job_desc['DataProcessing']['OutputFilter']
Beispiel #25
0
def test_mxnet_mms_version(
    strftime, repack_model, sagemaker_session, mxnet_version, skip_if_not_mms_version
):
    mx = MXNet(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        train_instance_count=INSTANCE_COUNT,
        train_instance_type=INSTANCE_TYPE,
        framework_version=mxnet_version,
    )

    inputs = "s3://mybucket/train"

    mx.fit(inputs=inputs)

    sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
    assert sagemaker_call_names == ["train", "logs_for_job"]
    boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
    assert boto_call_names == ["resource"]

    expected_train_args = _create_train_job(mxnet_version)
    expected_train_args["input_config"][0]["DataSource"]["S3DataSource"]["S3Uri"] = inputs

    actual_train_args = sagemaker_session.method_calls[0][2]
    assert actual_train_args == expected_train_args

    model = mx.create_model()

    expected_image_base = _get_full_image_uri(mxnet_version, IMAGE_REPO_SERVING_NAME, "gpu")

    environment = {
        "Environment": {
            "SAGEMAKER_SUBMIT_DIRECTORY": "s3://mybucket/sagemaker-mxnet-2017-11-06-14:14:15.672/model.tar.gz",
            "SAGEMAKER_PROGRAM": "dummy_script.py",
            "SAGEMAKER_ENABLE_CLOUDWATCH_METRICS": "false",
            "SAGEMAKER_REGION": "us-west-2",
            "SAGEMAKER_CONTAINER_LOG_LEVEL": "20",
        },
        "Image": expected_image_base.format(mxnet_version),
        "ModelDataUrl": "s3://mybucket/sagemaker-mxnet-2017-11-06-14:14:15.672/model.tar.gz",
    }
    assert environment == model.prepare_container_def(GPU)

    assert "cpu" in model.prepare_container_def(CPU)["Image"]
    predictor = mx.deploy(1, GPU)
    assert isinstance(predictor, MXNetPredictor)
Beispiel #26
0
def test_requirements_file(image_uri, sagemaker_local_session,
                           local_instance_type, framework_version, tmpdir):
    mx = MXNet(
        entry_point='entry.py',
        source_dir=SOURCE_PATH,
        role='SageMakerRole',
        train_instance_count=1,
        train_instance_type=local_instance_type,
        image_name=image_uri,
        framework_version=framework_version,
        output_path='file://{}'.format(tmpdir),
        sagemaker_session=sagemaker_local_session,
    )

    mx.fit()
    local_mode_utils.assert_output_files_exist(str(tmpdir), 'output',
                                               MODEL_SUCCESS_FILES['output'])
def test_stop_transform_job(sagemaker_session, mxnet_full_version, cpu_instance_type):
    data_path = os.path.join(DATA_DIR, "mxnet_mnist")
    script_path = os.path.join(data_path, "mnist.py")
    tags = [{"Key": "some-tag", "Value": "value-for-tag"}]

    mx = MXNet(
        entry_point=script_path,
        role="SageMakerRole",
        train_instance_count=1,
        train_instance_type=cpu_instance_type,
        sagemaker_session=sagemaker_session,
        framework_version=mxnet_full_version,
    )

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "train"), key_prefix="integ-test-data/mxnet_mnist/train"
    )
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "test"), key_prefix="integ-test-data/mxnet_mnist/test"
    )
    job_name = unique_name_from_base("test-mxnet-transform")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({"train": train_input, "test": test_input}, job_name=job_name)

    transform_input_path = os.path.join(data_path, "transform", "data.csv")
    transform_input_key_prefix = "integ-test-data/mxnet_mnist/transform"
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix
    )

    transformer = mx.transformer(1, cpu_instance_type, tags=tags)
    transformer.transform(transform_input, content_type="text/csv")

    time.sleep(15)

    latest_transform_job_name = transformer.latest_transform_job.name

    print("Attempting to stop {}".format(latest_transform_job_name))

    transformer.stop_transform_job()

    desc = transformer.latest_transform_job.sagemaker_session.sagemaker_client.describe_transform_job(
        TransformJobName=latest_transform_job_name
    )
    assert desc["TransformJobStatus"] == "Stopped"
def test_transform_mxnet_tags(sagemaker_session, mxnet_full_version, cpu_instance_type):
    data_path = os.path.join(DATA_DIR, "mxnet_mnist")
    script_path = os.path.join(data_path, "mnist.py")
    tags = [{"Key": "some-tag", "Value": "value-for-tag"}]

    mx = MXNet(
        entry_point=script_path,
        role="SageMakerRole",
        train_instance_count=1,
        train_instance_type=cpu_instance_type,
        sagemaker_session=sagemaker_session,
        framework_version=mxnet_full_version,
    )

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "train"), key_prefix="integ-test-data/mxnet_mnist/train"
    )
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "test"), key_prefix="integ-test-data/mxnet_mnist/test"
    )
    job_name = unique_name_from_base("test-mxnet-transform")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({"train": train_input, "test": test_input}, job_name=job_name)

    transform_input_path = os.path.join(data_path, "transform", "data.csv")
    transform_input_key_prefix = "integ-test-data/mxnet_mnist/transform"
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix
    )

    transformer = mx.transformer(1, cpu_instance_type, tags=tags)
    transformer.transform(transform_input, content_type="text/csv")

    with timeout_and_delete_model_with_transformer(
        transformer, sagemaker_session, minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES
    ):
        transformer.wait()
        model_desc = sagemaker_session.sagemaker_client.describe_model(
            ModelName=transformer.model_name
        )
        model_tags = sagemaker_session.sagemaker_client.list_tags(
            ResourceArn=model_desc["ModelArn"]
        )["Tags"]
        assert tags == model_tags
Beispiel #29
0
def test_mx_enable_sm_metrics_for_version(
    sagemaker_session, mxnet_training_version, mxnet_training_py_version
):
    mx = MXNet(
        entry_point=SCRIPT_PATH,
        framework_version=mxnet_training_version,
        py_version=mxnet_training_py_version,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        instance_count=INSTANCE_COUNT,
        instance_type=INSTANCE_TYPE,
    )
    version = tuple(int(s) for s in mxnet_training_version.split("."))
    lowest_version = (1, 6, 0)[: len(version)]
    if version >= lowest_version:
        assert mx.enable_sagemaker_metrics
    else:
        assert mx.enable_sagemaker_metrics is None
Beispiel #30
0
def test_mxnet_training_failure(sagemaker_local_session, mxnet_full_version,
                                tmpdir):
    script_path = os.path.join(DATA_DIR, 'mxnet_mnist', 'failure_script.py')

    mx = MXNet(entry_point=script_path,
               role='SageMakerRole',
               framework_version=mxnet_full_version,
               py_version=PYTHON_VERSION,
               train_instance_count=1,
               train_instance_type='local',
               sagemaker_session=sagemaker_local_session,
               output_path='file://{}'.format(tmpdir))

    with pytest.raises(RuntimeError):
        mx.fit()

    with tarfile.open(os.path.join(str(tmpdir), 'output.tar.gz')) as tar:
        tar.getmember('failure')