def test_mxnet_local_data_local_script():
    local_mode_lock_fd = open(LOCK_PATH, 'w')
    local_mode_lock = local_mode_lock_fd.fileno()

    script_path = os.path.join(DATA_DIR, 'mxnet_mnist', 'mnist.py')
    data_path = os.path.join(DATA_DIR, 'mxnet_mnist')

    mx = MXNet(entry_point=script_path, role='SageMakerRole',
               train_instance_count=1, train_instance_type='local',
               sagemaker_session=LocalNoS3Session())

    train_input = 'file://' + os.path.join(data_path, 'train')
    test_input = 'file://' + os.path.join(data_path, 'test')

    mx.fit({'train': train_input, 'test': test_input})
    endpoint_name = mx.latest_training_job.name
    try:
        # Since Local Mode uses the same port for serving, we need a lock in order
        # to allow concurrent test execution. The serving test is really fast so it still
        # makes sense to allow this behavior.
        fcntl.lockf(local_mode_lock, fcntl.LOCK_EX)
        predictor = mx.deploy(1, 'local', endpoint_name=endpoint_name)
        data = numpy.zeros(shape=(1, 1, 28, 28))
        predictor.predict(data)
    finally:
        mx.delete_endpoint()
        time.sleep(5)
        fcntl.lockf(local_mode_lock, fcntl.LOCK_UN)
def mxnet_model(sagemaker_local_session):
    script_path = os.path.join(DATA_DIR, 'mxnet_mnist', 'mnist.py')
    data_path = os.path.join(DATA_DIR, 'mxnet_mnist')

    mx = MXNet(entry_point=script_path, role='SageMakerRole',
               train_instance_count=1, train_instance_type='local',
               sagemaker_session=sagemaker_local_session)

    train_input = mx.sagemaker_session.upload_data(path=os.path.join(data_path, 'train'),
                                                   key_prefix='integ-test-data/mxnet_mnist/train')
    test_input = mx.sagemaker_session.upload_data(path=os.path.join(data_path, 'test'),
                                                  key_prefix='integ-test-data/mxnet_mnist/test')

    mx.fit({'train': train_input, 'test': test_input})
    model = mx.create_model(1)
    return model
예제 #3
0
def test_create_model_with_optional_params(sagemaker_session):
    container_log_level = '"logging.INFO"'
    source_dir = 's3://mybucket/source'
    enable_cloudwatch_metrics = 'true'
    mx = MXNet(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
               train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
               container_log_level=container_log_level, base_job_name='job', source_dir=source_dir,
               enable_cloudwatch_metrics=enable_cloudwatch_metrics)

    mx.fit(inputs='s3://mybucket/train', job_name='new_name')

    new_role = 'role'
    model_server_workers = 2
    model = mx.create_model(role=new_role, model_server_workers=model_server_workers)

    assert model.role == new_role
    assert model.model_server_workers == model_server_workers
def test_local_transform_mxnet(sagemaker_local_session, tmpdir,
                               mxnet_full_version):
    data_path = os.path.join(DATA_DIR, 'mxnet_mnist')
    script_path = os.path.join(data_path, 'mnist.py')

    mx = MXNet(entry_point=script_path,
               role='SageMakerRole',
               train_instance_count=1,
               train_instance_type='ml.c4.xlarge',
               framework_version=mxnet_full_version,
               sagemaker_session=sagemaker_local_session)

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'train'),
        key_prefix='integ-test-data/mxnet_mnist/train')
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'test'),
        key_prefix='integ-test-data/mxnet_mnist/test')

    with timeout(minutes=15):
        mx.fit({'train': train_input, 'test': test_input})

    transform_input_path = os.path.join(data_path, 'transform')
    transform_input_key_prefix = 'integ-test-data/mxnet_mnist/transform'
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix)

    output_path = 'file://%s' % (str(tmpdir))
    transformer = mx.transformer(1,
                                 'local',
                                 assemble_with='Line',
                                 max_payload=1,
                                 strategy='SingleRecord',
                                 output_path=output_path)

    with local_mode_utils.lock():
        transformer.transform(transform_input,
                              content_type='text/csv',
                              split_type='Line')
        transformer.wait()

    assert os.path.exists(os.path.join(str(tmpdir), 'data.csv.out'))
def test_mxnet_neo(strftime, sagemaker_session, mxnet_version,
                   skip_if_mms_version):
    mx = MXNet(entry_point=SCRIPT_PATH,
               role=ROLE,
               sagemaker_session=sagemaker_session,
               train_instance_count=INSTANCE_COUNT,
               train_instance_type=INSTANCE_TYPE,
               framework_version=mxnet_version)

    inputs = 's3://mybucket/train'

    mx.fit(inputs=inputs)

    input_shape = {'data': [100, 1, 28, 28]}
    output_location = 's3://neo-sdk-test'

    compiled_model = mx.compile_model(target_instance_family='ml_c4',
                                      input_shape=input_shape,
                                      output_path=output_location)

    sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
    assert sagemaker_call_names == [
        'train', 'logs_for_job', 'sagemaker_client.describe_training_job',
        'compile_model', 'wait_for_compilation_job'
    ]

    expected_compile_model_args = _create_compilation_job(
        json.dumps(input_shape), output_location)
    actual_compile_model_args = sagemaker_session.method_calls[3][2]
    assert expected_compile_model_args == actual_compile_model_args

    assert compiled_model.image == _neo_inference_image(mxnet_version)

    predictor = mx.deploy(1, CPU, use_compiled_model=True)
    assert isinstance(predictor, MXNetPredictor)

    with pytest.raises(Exception) as wrong_target:
        mx.deploy(1, CPU_C5, use_compiled_model=True)
    assert str(wrong_target.value).startswith('No compiled model for')

    # deploy without sagemaker Neo should continue to work
    mx.deploy(1, CPU)
def test_transform_mxnet(sagemaker_session, mxnet_full_version):
    data_path = os.path.join(DATA_DIR, 'mxnet_mnist')
    script_path = os.path.join(data_path, 'mnist.py')

    mx = MXNet(entry_point=script_path,
               role='SageMakerRole',
               train_instance_count=1,
               train_instance_type='ml.c4.xlarge',
               sagemaker_session=sagemaker_session,
               framework_version=mxnet_full_version)

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'train'),
        key_prefix='integ-test-data/mxnet_mnist/train')
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'test'),
        key_prefix='integ-test-data/mxnet_mnist/test')

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({'train': train_input, 'test': test_input})

    transform_input_path = os.path.join(data_path, 'transform', 'data.csv')
    transform_input_key_prefix = 'integ-test-data/mxnet_mnist/transform'
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix)

    sts_client = sagemaker_session.boto_session.client('sts')
    account_id = sts_client.get_caller_identity()['Account']
    kms_client = sagemaker_session.boto_session.client('kms')
    kms_key_arn = get_or_create_kms_key(kms_client, account_id)

    transformer = _create_transformer_and_transform_job(
        mx, transform_input, kms_key_arn)
    with timeout_and_delete_model_with_transformer(
            transformer,
            sagemaker_session,
            minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
        transformer.wait()

    job_desc = transformer.sagemaker_session.sagemaker_client.describe_transform_job(
        TransformJobName=transformer.latest_transform_job.name)
    assert kms_key_arn == job_desc['TransformResources']['VolumeKmsKeyId']
예제 #7
0
def mxnet_model(sagemaker_local_session):
    script_path = os.path.join(DATA_DIR, 'mxnet_mnist', 'mnist.py')
    data_path = os.path.join(DATA_DIR, 'mxnet_mnist')

    mx = MXNet(entry_point=script_path,
               role='SageMakerRole',
               train_instance_count=1,
               train_instance_type='local',
               sagemaker_session=sagemaker_local_session)

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'train'),
        key_prefix='integ-test-data/mxnet_mnist/train')
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'test'),
        key_prefix='integ-test-data/mxnet_mnist/test')

    mx.fit({'train': train_input, 'test': test_input})
    model = mx.create_model(1)
    return model
def test_keras_training(docker_image, sagemaker_local_session,
                        local_instance_type, framework_version, tmpdir):
    keras_path = os.path.join(RESOURCE_PATH, 'keras')
    script_path = os.path.join(keras_path, 'keras_mnist.py')

    mx = MXNet(entry_point=script_path,
               role='SageMakerRole',
               train_instance_count=1,
               train_instance_type=local_instance_type,
               sagemaker_session=sagemaker_local_session,
               image_name=docker_image,
               framework_version=framework_version,
               output_path='file://{}'.format(tmpdir))

    train = 'file://{}'.format(os.path.join(keras_path, 'data'))
    mx.fit({'train': train})

    for directory, files in MODEL_SUCCESS_FILES.items():
        local_mode_utils.assert_output_files_exist(str(tmpdir), directory,
                                                   files)
def test_create_model_with_custom_image(sagemaker_session):
    container_log_level = '"logging.INFO"'
    source_dir = 's3://mybucket/source'
    custom_image = 'mxnet:2.0'
    mx = MXNet(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
               train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
               image_name=custom_image, container_log_level=container_log_level,
               base_job_name='job', source_dir=source_dir)

    job_name = 'new_name'
    mx.fit(inputs='s3://mybucket/train', job_name='new_name')
    model = mx.create_model()

    assert model.sagemaker_session == sagemaker_session
    assert model.image == custom_image
    assert model.entry_point == SCRIPT_PATH
    assert model.role == ROLE
    assert model.name == job_name
    assert model.container_log_level == container_log_level
    assert model.source_dir == source_dir
def test_create_model_with_optional_params(sagemaker_session):
    container_log_level = '"logging.INFO"'
    source_dir = 's3://mybucket/source'
    enable_cloudwatch_metrics = 'true'
    mx = MXNet(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
               train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
               container_log_level=container_log_level, base_job_name='job', source_dir=source_dir,
               enable_cloudwatch_metrics=enable_cloudwatch_metrics)

    mx.fit(inputs='s3://mybucket/train', job_name='new_name')

    new_role = 'role'
    model_server_workers = 2
    vpc_config = {'Subnets': ['foo'], 'SecurityGroupIds': ['bar']}
    model = mx.create_model(role=new_role, model_server_workers=model_server_workers,
                            vpc_config_override=vpc_config)

    assert model.role == new_role
    assert model.model_server_workers == model_server_workers
    assert model.vpc_config == vpc_config
예제 #11
0
def test_mxnet_local_training_env(mxnet_training_latest_version,
                                  mxnet_training_latest_py_version):
    data_path = os.path.join(DATA_DIR, "mxnet_mnist")
    script_path = os.path.join(data_path, "check_env.py")

    mx = MXNet(
        entry_point=script_path,
        role="SageMakerRole",
        instance_count=1,
        instance_type="local",
        framework_version=mxnet_training_latest_version,
        py_version=mxnet_training_latest_py_version,
        sagemaker_session=LocalNoS3Session(),
        environment={"MYVAR": "HELLO_WORLD"},
    )

    train_input = "file://" + os.path.join(data_path, "train")
    test_input = "file://" + os.path.join(data_path, "test")

    mx.fit({"train": train_input, "test": test_input})
def test_mxnet_training_failure(sagemaker_local_session, mxnet_full_version,
                                tmpdir):
    script_path = os.path.join(DATA_DIR, "mxnet_mnist", "failure_script.py")

    mx = MXNet(
        entry_point=script_path,
        role="SageMakerRole",
        framework_version=mxnet_full_version,
        py_version=PYTHON_VERSION,
        train_instance_count=1,
        train_instance_type="local",
        sagemaker_session=sagemaker_local_session,
        output_path="file://{}".format(tmpdir),
    )

    with pytest.raises(RuntimeError):
        mx.fit()

    with tarfile.open(os.path.join(str(tmpdir), "output.tar.gz")) as tar:
        tar.getmember("failure")
예제 #13
0
def test_transform_mxnet_tags(sagemaker_session, mxnet_full_version):
    data_path = os.path.join(DATA_DIR, 'mxnet_mnist')
    script_path = os.path.join(data_path, 'mnist.py')
    tags = [{'Key': 'some-tag', 'Value': 'value-for-tag'}]

    mx = MXNet(entry_point=script_path,
               role='SageMakerRole',
               train_instance_count=1,
               train_instance_type='ml.c4.xlarge',
               sagemaker_session=sagemaker_session,
               framework_version=mxnet_full_version)

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'train'),
        key_prefix='integ-test-data/mxnet_mnist/train')
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'test'),
        key_prefix='integ-test-data/mxnet_mnist/test')
    job_name = unique_name_from_base('test-mxnet-transform')

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({'train': train_input, 'test': test_input}, job_name=job_name)

    transform_input_path = os.path.join(data_path, 'transform', 'data.csv')
    transform_input_key_prefix = 'integ-test-data/mxnet_mnist/transform'
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix)

    transformer = mx.transformer(1, 'ml.m4.xlarge', tags=tags)
    transformer.transform(transform_input, content_type='text/csv')

    with timeout_and_delete_model_with_transformer(
            transformer,
            sagemaker_session,
            minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
        transformer.wait()
        model_desc = sagemaker_session.sagemaker_client.describe_model(
            ModelName=transformer.model_name)
        model_tags = sagemaker_session.sagemaker_client.list_tags(
            ResourceArn=model_desc['ModelArn'])['Tags']
        assert tags == model_tags
예제 #14
0
def test_create_model(sagemaker_session, mxnet_version):
    container_log_level = '"logging.INFO"'
    source_dir = 's3://mybucket/source'
    mx = MXNet(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
               train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
               framework_version=mxnet_version, container_log_level=container_log_level,
               base_job_name='job', source_dir=source_dir)

    job_name = 'new_name'
    mx.fit(inputs='s3://mybucket/train', job_name=job_name)
    model = mx.create_model()

    assert model.sagemaker_session == sagemaker_session
    assert model.framework_version == mxnet_version
    assert model.py_version == mx.py_version
    assert model.entry_point == SCRIPT_PATH
    assert model.role == ROLE
    assert model.name == job_name
    assert model.container_log_level == container_log_level
    assert model.source_dir == source_dir
    assert model.vpc_config is None
예제 #15
0
def test_create_model_with_custom_hosting_image(sagemaker_session):
    container_log_level = '"logging.INFO"'
    custom_image = "mxnet:2.0"
    custom_hosting_image = "mxnet_hosting:2.0"
    mx = MXNet(
        entry_point=SCRIPT_PATH,
        framework_version="2.0",
        py_version="py3",
        role=ROLE,
        sagemaker_session=sagemaker_session,
        instance_count=INSTANCE_COUNT,
        instance_type=INSTANCE_TYPE,
        image_uri=custom_image,
        container_log_level=container_log_level,
        base_job_name="job",
    )

    mx.fit(inputs="s3://mybucket/train", job_name="new_name")
    model = mx.create_model(image_uri=custom_hosting_image)

    assert model.image_uri == custom_hosting_image
예제 #16
0
def _create_and_fit_estimator(mxnet_version, py_version, sagemaker_session, instance_type, tmpdir):
    job_name = sagemaker.utils.unique_name_from_base("mx-horovod")
    estimator = MXNet(
        entry_point=os.path.join(horovod_dir, "hvd_mnist_mxnet.py"),
        role="SageMakerRole",
        instance_count=2,
        instance_type=instance_type,
        sagemaker_session=sagemaker_session,
        py_version=py_version,
        framework_version=mxnet_version,
        distribution={"mpi": {"enabled": True}},
    )

    with timeout.timeout(minutes=integ.TRAINING_DEFAULT_TIMEOUT_MINUTES):
        estimator.fit(job_name=job_name)

        tmp = str(tmpdir)
        extract_files_from_s3(estimator.model_data, tmp, sagemaker_session)

        for rank in range(2):
            assert read_json("rank-%s" % rank, tmp)["rank"] == rank
def test_mxnet_local_data_local_script(mxnet_training_latest_version,
                                       mxnet_training_latest_py_version):
    data_path = os.path.join(DATA_DIR, "mxnet_mnist")
    script_path = os.path.join(data_path, "mnist.py")
    local_no_s3_session = LocalNoS3Session()
    local_no_s3_session.boto_session.resource = Mock(
        side_effect=local_no_s3_session.boto_session.resource)
    local_no_s3_session.boto_session.client = Mock(
        side_effect=local_no_s3_session.boto_session.client)

    mx = MXNet(
        entry_point=script_path,
        role="SageMakerRole",
        instance_count=1,
        instance_type="local",
        framework_version=mxnet_training_latest_version,
        py_version=mxnet_training_latest_py_version,
        sagemaker_session=local_no_s3_session,
    )

    train_input = "file://" + os.path.join(data_path, "train")
    test_input = "file://" + os.path.join(data_path, "test")

    mx.fit({"train": train_input, "test": test_input})
    endpoint_name = mx.latest_training_job.name

    with lock.lock(LOCK_PATH):
        try:
            predictor = mx.deploy(1, "local", endpoint_name=endpoint_name)
            data = numpy.zeros(shape=(1, 1, 28, 28))
            predictor.predict(data)
            # check if no boto_session s3 calls were made
            with pytest.raises(AssertionError):
                local_no_s3_session.boto_session.resource.assert_called_with(
                    "s3", region_name=ANY)
            with pytest.raises(AssertionError):
                local_no_s3_session.boto_session.client.assert_called_with(
                    "s3", region_name=ANY)
        finally:
            predictor.delete_endpoint()
예제 #18
0
def mxnet_estimator(sagemaker_session, mxnet_full_version, cpu_instance_type):
    mx = MXNet(
        entry_point=os.path.join(MXNET_MNIST_PATH, "mnist.py"),
        role="SageMakerRole",
        train_instance_count=1,
        train_instance_type=cpu_instance_type,
        sagemaker_session=sagemaker_session,
        framework_version=mxnet_full_version,
    )

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(MXNET_MNIST_PATH, "train"),
        key_prefix="integ-test-data/mxnet_mnist/train")
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(MXNET_MNIST_PATH, "test"),
        key_prefix="integ-test-data/mxnet_mnist/test")

    job_name = unique_name_from_base("test-mxnet-transform")
    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({"train": train_input, "test": test_input}, job_name=job_name)

    return mx
예제 #19
0
def test_create_model_with_custom_image(sagemaker_session):
    container_log_level = '"logging.INFO"'
    source_dir = 's3://mybucket/source'
    enable_cloudwatch_metrics = 'true'
    custom_image = 'mxnet:2.0'
    mx = MXNet(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
               train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
               image_name=custom_image, container_log_level=container_log_level,
               base_job_name='job', source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics)

    job_name = 'new_name'
    mx.fit(inputs='s3://mybucket/train', job_name='new_name')
    model = mx.create_model()

    assert model.sagemaker_session == sagemaker_session
    assert model.image == custom_image
    assert model.entry_point == SCRIPT_PATH
    assert model.role == ROLE
    assert model.name == job_name
    assert model.container_log_level == container_log_level
    assert model.source_dir == source_dir
    assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics
def test_transform_mxnet(sagemaker_session):
    data_path = os.path.join(DATA_DIR, 'mxnet_mnist')
    script_path = os.path.join(data_path, 'mnist.py')

    mx = MXNet(entry_point=script_path, role='SageMakerRole', train_instance_count=1,
               train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session)

    train_input = mx.sagemaker_session.upload_data(path=os.path.join(data_path, 'train'),
                                                   key_prefix='integ-test-data/mxnet_mnist/train')
    test_input = mx.sagemaker_session.upload_data(path=os.path.join(data_path, 'test'),
                                                  key_prefix='integ-test-data/mxnet_mnist/test')

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({'train': train_input, 'test': test_input})

    transform_input_path = os.path.join(data_path, 'transform', 'data.csv')
    transform_input_key_prefix = 'integ-test-data/mxnet_mnist/transform'
    transform_input = mx.sagemaker_session.upload_data(path=transform_input_path,
                                                       key_prefix=transform_input_key_prefix)

    transformer = _create_transformer_and_transform_job(mx, transform_input)
    transformer.wait()
def test_transform_mxnet_logs(sagemaker_session, mxnet_full_version, cpu_instance_type):
    data_path = os.path.join(DATA_DIR, "mxnet_mnist")
    script_path = os.path.join(data_path, "mnist.py")

    mx = MXNet(
        entry_point=script_path,
        role="SageMakerRole",
        train_instance_count=1,
        train_instance_type=cpu_instance_type,
        sagemaker_session=sagemaker_session,
        framework_version=mxnet_full_version,
    )

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "train"), key_prefix="integ-test-data/mxnet_mnist/train"
    )
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, "test"), key_prefix="integ-test-data/mxnet_mnist/test"
    )
    job_name = unique_name_from_base("test-mxnet-transform")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({"train": train_input, "test": test_input}, job_name=job_name)

    transform_input_path = os.path.join(data_path, "transform", "data.csv")
    transform_input_key_prefix = "integ-test-data/mxnet_mnist/transform"
    transform_input = mx.sagemaker_session.upload_data(
        path=transform_input_path, key_prefix=transform_input_key_prefix
    )

    with timeout(minutes=45):
        transformer = _create_transformer_and_transform_job(
            mx, transform_input, cpu_instance_type, wait=True, logs=True
        )

    with timeout_and_delete_model_with_transformer(
        transformer, sagemaker_session, minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES
    ):
        transformer.wait()
예제 #22
0
def test_create_model_with_optional_params(
    sagemaker_session, mxnet_inference_version, mxnet_inference_py_version
):
    container_log_level = '"logging.INFO"'
    source_dir = "s3://mybucket/source"
    mx = MXNet(
        entry_point=SCRIPT_NAME,
        source_dir=source_dir,
        framework_version=mxnet_inference_version,
        py_version=mxnet_inference_py_version,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        instance_count=INSTANCE_COUNT,
        instance_type=INSTANCE_TYPE,
        container_log_level=container_log_level,
        base_job_name="job",
    )

    mx.fit(inputs="s3://mybucket/train", job_name="new_name")

    new_role = "role"
    model_server_workers = 2
    vpc_config = {"Subnets": ["foo"], "SecurityGroupIds": ["bar"]}
    model_name = "model-name"
    model = mx.create_model(
        role=new_role,
        model_server_workers=model_server_workers,
        vpc_config_override=vpc_config,
        entry_point=SERVING_SCRIPT_FILE,
        env=ENV,
        name=model_name,
    )

    assert model.role == new_role
    assert model.model_server_workers == model_server_workers
    assert model.vpc_config == vpc_config
    assert model.entry_point == SERVING_SCRIPT_FILE
    assert model.env == ENV
    assert model.name == model_name
예제 #23
0
def test_create_model(
    name_from_base, sagemaker_session, mxnet_inference_version, mxnet_inference_py_version
):
    container_log_level = '"logging.INFO"'
    source_dir = "s3://mybucket/source"
    base_job_name = "job"

    mx = MXNet(
        entry_point=SCRIPT_NAME,
        source_dir=source_dir,
        framework_version=mxnet_inference_version,
        py_version=mxnet_inference_py_version,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        instance_count=INSTANCE_COUNT,
        instance_type=INSTANCE_TYPE,
        container_log_level=container_log_level,
        base_job_name=base_job_name,
    )

    mx.fit(inputs="s3://mybucket/train", job_name="new_name")

    model_name = "model_name"
    name_from_base.return_value = model_name
    model = mx.create_model()

    assert model.sagemaker_session == sagemaker_session
    assert model.framework_version == mxnet_inference_version
    assert model.py_version == mxnet_inference_py_version
    assert model.entry_point == SCRIPT_NAME
    assert model.role == ROLE
    assert model.name == model_name
    assert model.container_log_level == container_log_level
    assert model.source_dir == source_dir
    assert model.image_uri is None
    assert model.vpc_config is None

    name_from_base.assert_called_with(base_job_name)
예제 #24
0
def test_mxnet_local_data_local_script():
    data_path = os.path.join(DATA_DIR, 'mxnet_mnist')
    script_path = os.path.join(data_path, 'mnist_framework_mode.py')

    mx = MXNet(entry_point=script_path,
               role='SageMakerRole',
               train_instance_count=1,
               train_instance_type='local',
               sagemaker_session=LocalNoS3Session())

    train_input = 'file://' + os.path.join(data_path, 'train')
    test_input = 'file://' + os.path.join(data_path, 'test')

    mx.fit({'train': train_input, 'test': test_input})
    endpoint_name = mx.latest_training_job.name

    with local_mode_utils.lock():
        try:
            predictor = mx.deploy(1, 'local', endpoint_name=endpoint_name)
            data = numpy.zeros(shape=(1, 1, 28, 28))
            predictor.predict(data)
        finally:
            mx.delete_endpoint()
예제 #25
0
def test_keras_training(docker_image, sagemaker_local_session,
                        local_instance_type, framework_version, tmpdir):
    if Version(framework_version) >= Version('1.9.0'):
        pytest.skip(f"Keras support has been deprecated MXNet 1.9.0 onwards")

    keras_path = os.path.join(RESOURCE_PATH, 'keras')
    script_path = os.path.join(keras_path, 'keras_mnist.py')

    mx = MXNet(entry_point=script_path,
               role='SageMakerRole',
               instance_count=1,
               instance_type=local_instance_type,
               sagemaker_session=sagemaker_local_session,
               image_uri=docker_image,
               framework_version=framework_version,
               output_path='file://{}'.format(tmpdir))

    train = 'file://{}'.format(os.path.join(keras_path, 'data'))
    mx.fit({'train': train})

    for directory, files in MODEL_SUCCESS_FILES.items():
        local_mode_utils.assert_output_files_exist(str(tmpdir), directory,
                                                   files)
def test_deploy(sagemaker_session, tf_version):
    estimator = MXNet(entry_point=SCRIPT, source_dir=SOURCE_DIR, role=ROLE,
                      framework_version=tf_version,
                      train_instance_count=2, train_instance_type=INSTANCE_TYPE_GPU,
                      sagemaker_session=sagemaker_session,
                      base_job_name='test-cifar')

    estimator.fit('s3://mybucket/train')
    print('job succeeded: {}'.format(estimator.latest_training_job.name))

    estimator.deploy(initial_instance_count=1, instance_type=INSTANCE_TYPE_CPU)
    image = IMAGE_URI_FORMAT_STRING.format(REGION, CPU_IMAGE_NAME, tf_version, 'cpu', 'py2')
    sagemaker_session.create_model.assert_called_with(
        estimator._current_job_name,
        ROLE,
        {'Environment':
         {'SAGEMAKER_ENABLE_CLOUDWATCH_METRICS': 'false',
          'SAGEMAKER_CONTAINER_LOG_LEVEL': '20',
          'SAGEMAKER_SUBMIT_DIRECTORY': SOURCE_DIR,
          'SAGEMAKER_REGION': REGION,
          'SAGEMAKER_PROGRAM': SCRIPT},
         'Image': image,
         'ModelDataUrl': 's3://m/m.tar.gz'})
def test_transform_mxnet_vpc(sagemaker_session, mxnet_full_version):
    data_path = os.path.join(DATA_DIR, 'mxnet_mnist')
    script_path = os.path.join(data_path, 'mnist.py')

    ec2_client = sagemaker_session.boto_session.client('ec2')
    subnet_ids, security_group_id = get_or_create_vpc_resources(ec2_client,
                                                                sagemaker_session.boto_session.region_name)

    mx = MXNet(entry_point=script_path, role='SageMakerRole', train_instance_count=1,
               train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session,
               framework_version=mxnet_full_version, subnets=subnet_ids,
               security_group_ids=[security_group_id])

    train_input = mx.sagemaker_session.upload_data(path=os.path.join(data_path, 'train'),
                                                   key_prefix='integ-test-data/mxnet_mnist/train')
    test_input = mx.sagemaker_session.upload_data(path=os.path.join(data_path, 'test'),
                                                  key_prefix='integ-test-data/mxnet_mnist/test')

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        mx.fit({'train': train_input, 'test': test_input})

    job_desc = sagemaker_session.sagemaker_client.describe_training_job(TrainingJobName=mx.latest_training_job.name)
    assert set(subnet_ids) == set(job_desc['VpcConfig']['Subnets'])
    assert [security_group_id] == job_desc['VpcConfig']['SecurityGroupIds']

    transform_input_path = os.path.join(data_path, 'transform', 'data.csv')
    transform_input_key_prefix = 'integ-test-data/mxnet_mnist/transform'
    transform_input = mx.sagemaker_session.upload_data(path=transform_input_path,
                                                       key_prefix=transform_input_key_prefix)

    transformer = _create_transformer_and_transform_job(mx, transform_input)
    with timeout(minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES):
        transformer.wait()

    model_desc = sagemaker_session.sagemaker_client.describe_model(ModelName=transformer.model_name)
    assert set(subnet_ids) == set(model_desc['VpcConfig']['Subnets'])
    assert [security_group_id] == model_desc['VpcConfig']['SecurityGroupIds']
def test_mxnet(strftime, sagemaker_session, mxnet_version):
    mx = MXNet(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
               train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
               framework_version=mxnet_version)

    inputs = 's3://mybucket/train'

    mx.fit(inputs=inputs)

    sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
    assert sagemaker_call_names == ['train', 'logs_for_job']
    boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
    assert boto_call_names == ['resource']

    expected_train_args = _create_train_job(mxnet_version)
    expected_train_args['input_config'][0]['DataSource']['S3DataSource']['S3Uri'] = inputs

    actual_train_args = sagemaker_session.method_calls[0][2]
    assert actual_train_args == expected_train_args

    model = mx.create_model()

    expected_image_base = '520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet:{}-gpu-py2'
    environment = {
        'Environment': {
            'SAGEMAKER_SUBMIT_DIRECTORY': 's3://mybucket/sagemaker-mxnet-{}/source/sourcedir.tar.gz'.format(TIMESTAMP),
            'SAGEMAKER_PROGRAM': 'dummy_script.py', 'SAGEMAKER_ENABLE_CLOUDWATCH_METRICS': 'false',
            'SAGEMAKER_REGION': 'us-west-2', 'SAGEMAKER_CONTAINER_LOG_LEVEL': '20'
        },
        'Image': expected_image_base.format(mxnet_version), 'ModelDataUrl': 's3://m/m.tar.gz'
    }
    assert environment == model.prepare_container_def(GPU)

    assert 'cpu' in model.prepare_container_def(CPU)['Image']
    predictor = mx.deploy(1, GPU)
    assert isinstance(predictor, MXNetPredictor)
def test_mxnet_local_mode(sagemaker_local_session, mxnet_full_version):
    local_mode_lock_fd = open(LOCK_PATH, 'w')
    local_mode_lock = local_mode_lock_fd.fileno()

    script_path = os.path.join(DATA_DIR, 'mxnet_mnist', 'mnist.py')
    data_path = os.path.join(DATA_DIR, 'mxnet_mnist')

    mx = MXNet(entry_point=script_path,
               role='SageMakerRole',
               py_version=PYTHON_VERSION,
               train_instance_count=1,
               train_instance_type='local',
               sagemaker_session=sagemaker_local_session,
               framework_version=mxnet_full_version)

    train_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'train'),
        key_prefix='integ-test-data/mxnet_mnist/train')
    test_input = mx.sagemaker_session.upload_data(
        path=os.path.join(data_path, 'test'),
        key_prefix='integ-test-data/mxnet_mnist/test')

    mx.fit({'train': train_input, 'test': test_input})
    endpoint_name = mx.latest_training_job.name
    try:
        # Since Local Mode uses the same port for serving, we need a lock in order
        # to allow concurrent test execution. The serving test is really fast so it still
        # makes sense to allow this behavior.
        fcntl.lockf(local_mode_lock, fcntl.LOCK_EX)
        predictor = mx.deploy(1, 'local', endpoint_name=endpoint_name)
        data = numpy.zeros(shape=(1, 1, 28, 28))
        predictor.predict(data)
    finally:
        mx.delete_endpoint()
        time.sleep(5)
        fcntl.lockf(local_mode_lock, fcntl.LOCK_UN)
예제 #30
0
def test_mxnet(strftime, sagemaker_session, mxnet_version):
    mx = MXNet(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
               train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
               framework_version=mxnet_version)

    inputs = 's3://mybucket/train'

    mx.fit(inputs=inputs)

    sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
    assert sagemaker_call_names == ['train', 'logs_for_job']
    boto_call_names = [c[0] for c in sagemaker_session.boto_session.method_calls]
    assert boto_call_names == ['resource']

    expected_train_args = _create_train_job(mxnet_version)
    expected_train_args['input_config'][0]['DataSource']['S3DataSource']['S3Uri'] = inputs

    actual_train_args = sagemaker_session.method_calls[0][2]
    assert actual_train_args == expected_train_args

    model = mx.create_model()

    expected_image_base = '520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet:{}-gpu-py2'
    environment = {
        'Environment': {
            'SAGEMAKER_SUBMIT_DIRECTORY': 's3://mybucket/sagemaker-mxnet-{}/source/sourcedir.tar.gz'.format(TIMESTAMP),
            'SAGEMAKER_PROGRAM': 'dummy_script.py', 'SAGEMAKER_ENABLE_CLOUDWATCH_METRICS': 'false',
            'SAGEMAKER_REGION': 'us-west-2', 'SAGEMAKER_CONTAINER_LOG_LEVEL': '20'
        },
        'Image': expected_image_base.format(mxnet_version), 'ModelDataUrl': 's3://m/m.tar.gz'
    }
    assert environment == model.prepare_container_def(GPU)

    assert 'cpu' in model.prepare_container_def(CPU)['Image']
    predictor = mx.deploy(1, GPU)
    assert isinstance(predictor, MXNetPredictor)
    def _create_model(output_path):
        script_path = os.path.join(DATA_DIR, "mxnet_mnist", "mnist.py")
        data_path = os.path.join(DATA_DIR, "mxnet_mnist")

        mx = MXNet(
            entry_point=script_path,
            role="SageMakerRole",
            train_instance_count=1,
            train_instance_type="local",
            output_path=output_path,
            framework_version=mxnet_full_version,
            sagemaker_session=sagemaker_local_session,
        )

        train_input = mx.sagemaker_session.upload_data(
            path=os.path.join(data_path, "train"),
            key_prefix="integ-test-data/mxnet_mnist/train")
        test_input = mx.sagemaker_session.upload_data(
            path=os.path.join(data_path, "test"),
            key_prefix="integ-test-data/mxnet_mnist/test")

        mx.fit({"train": train_input, "test": test_input})
        model = mx.create_model(1)
        return model
예제 #32
0
    "sms_spam_classifier_mxnet_script.py",
    role=role,
    train_instance_count=1,
    train_instance_type="ml.c5.2xlarge",
    output_path=output_path,
    base_job_name="sms-spam-classifier-mxnet",
    framework_version="1.2",
    code_location=code_location,
    hyperparameters={
        "batch_size": 100,
        "epochs": 20,
        "learning_rate": 0.01
    },
    py_version="py3",
)

inputs = {
    "train": "s3://{0}/{1}/train/".format(bucket_name, bucket_key_prefix),
    "val": "s3://{0}/{1}/val/".format(bucket_name, bucket_key_prefix),
}

m.fit(inputs)

# deploy the model on sage maker endpoint

mxnet_pred = m.deploy(
    initial_instance_count=1,
    instance_type="ml.t2.medium",
    endpoint_name="sagemaker-endpoint",
)
예제 #33
0
                        # job_name=job_name,
                        # channel_input_dirs=channel_input_dirs,
                        output_path=output_path # output bucket name
                    )


# adding information that is job/runtime specific
# note: this isn't being written back to the config file
config_manager.put('sagemaker_job_info', 'job_name', job_name)
config_manager.put('sagemaker_job_info', 'ckpt_dir', ckpt_dir)
config_manager.put('sagemaker_job_info', 'timestamp', timestamp)


# write a log for this job run to a new file
history_save_path = 'train_history/'
config_manager.write_copy(os.path.join(history_save_path, job_name + '.json'))

# Call evaluate in seperate process
# Process(evaluate_on_timer, ('csvs/test_data.csv', ))


# Call Fit.  Train path expected to contain both a train_data.csv and test_data.csv file
train_path = cfg['train_path']
mx_estimator.fit({"train": str(train_path), "test": str(train_path)})

# Write termination time
end = time.time()
print("Total traintime: {}".format(end - ts))
config_manager.put('sagemaker_job_info', 'train_runtime', end - ts)
config_manager.write_copy(os.path.join(history_save_path, job_name + '.json'))
예제 #34
0
instance_type = 'ml.p3.2xlarge'
model = MXNet(
    source_dir='source',
    entry_point='model.py',
    py_version='py3',
    framework_version='1.4.1',
    train_instance_count=1,
    train_instance_type=instance_type,
    role=role,
    train_use_spot_instances=True,
    train_max_wait=24 * 60 * 60,
    metric_definitions=[  # publish algo metrics to Cloudwatch
        {
            'Name': 'train_acc',
            'Regex': "^.*epoch : accuracy = ([0-9.]+).*$"
        }, {
            'Name': 'test_acc',
            'Regex': "Test: accuracy: ([0-9.]+).*$"
        }
    ])

inputs = remote_inputs

model.fit(inputs={
    'train': inputs + '/train',
    'val': inputs + '/val',
    'test': inputs + '/test',
    'rgb': inputs + '/RGB'
},
          wait=True)
# * Instantiate an estimator object and pass in the code as the entry point parameter.
# * Train and deploy the model

# In[41]:

mnist_estimator = MXNet(entry_point='mnist.py',
                        role=role,
                        output_path=model_artifacts_location,
                        code_location=custom_code_upload_location,
                        train_instance_count=1,
                        train_instance_type='ml.m4.xlarge',
                        hyperparameters={'learning_rate': 0.1})

# In[42]:

mnist_estimator.fit({'train': train_data_location, 'test': test_data_location})

# In[43]:

predictor = mnist_estimator.deploy(initial_instance_count=1,
                                   instance_type='ml.m4.xlarge')

# ## Validating the model
# * Invoke the html script to read in an input. The pixel data from your drawing will be loaded into a data variable in this notebook.
# * Using the predictor object to classify the handwritten digit.
# * Raw predictions and Labelled predictions display the probabilities of the digit being each of the defined labels.
# * Most likely answer prints the label with the maximum probability.

# In[76]:

HTML(open("input.html").read())
예제 #36
0
    sagemaker_session=sagemaker_session,
    entry_point="smtrain.py",
    source_dir="../benchmarks/tr-gpu/mx",
    role="SageMakerRole",
    train_instance_count=12,
    train_instance_type="ml.p3.16xlarge",
    image_name=
    "841569659894.dkr.ecr.us-east-1.amazonaws.com/beta-mxnet-training:1.4.1-py3-gpu-build",
    py_version="py3",
    output_path="s3://bai-results-sagemaker",
    train_volume_size=200,
    framework_version="1.4",
    distributions={"parameter_server": {
        "enabled": True
    }},
)

data = {
    #"s1": "s3://mxnet-bln-data-sagemaker/small"
    "train":
    "s3://mxnet-asimov-data-sagemaker/imagenet/processed/train-480px-q95.rec",
    "trainidx":
    "s3://mxnet-asimov-data-sagemaker/imagenet/processed/train-480px-q95.idx",
    "validate":
    "s3://mxnet-asimov-data-sagemaker/imagenet/processed/val-480px-q95.rec",
    "validx":
    "s3://mxnet-asimov-data-sagemaker/imagenet/processed/val-480px-q95.idx",
}

tf_estimator.fit(data, logs=True, wait=True)