Exemple #1
0
def main():
    download_training_and_eval_data()

    image = 'sagemaker-tensorflow2-local'

    print('Starting model training.')
    california_housing_estimator = Estimator(
        image,
        DUMMY_IAM_ROLE,
        hyperparameters={'epochs': 10,
                         'batch_size': 64,
                         'learning_rate': 0.1},
        instance_count=1,
        instance_type="local")

    inputs = {'train': 'file://./data/train', 'test': 'file://./data/test'}
    california_housing_estimator.fit(inputs, logs=True)
    print('Completed model training')

    print('Deploying endpoint in local mode')
    predictor = california_housing_estimator.deploy(initial_instance_count=1, instance_type='local')

    do_inference_on_local_endpoint(predictor)

    print('About to delete the endpoint to stop paying (if in cloud mode).')
    predictor.delete_endpoint(predictor.endpoint_name)
def test_generic_create_model_vpc_config_override(sagemaker_session):
    vpc_config_a = {'Subnets': ['foo'], 'SecurityGroupIds': ['bar']}
    vpc_config_b = {'Subnets': ['foo', 'bar'], 'SecurityGroupIds': ['baz']}

    e = Estimator(IMAGE_NAME,
                  ROLE,
                  INSTANCE_COUNT,
                  INSTANCE_TYPE,
                  sagemaker_session=sagemaker_session)
    e.fit({'train': 's3://bucket/training-prefix'})
    assert e.get_vpc_config() is None
    assert e.create_model().vpc_config is None
    assert e.create_model(
        vpc_config_override=vpc_config_a).vpc_config == vpc_config_a
    assert e.create_model(vpc_config_override=None).vpc_config is None

    e.subnets = vpc_config_a['Subnets']
    e.security_group_ids = vpc_config_a['SecurityGroupIds']
    assert e.get_vpc_config() == vpc_config_a
    assert e.create_model().vpc_config == vpc_config_a
    assert e.create_model(
        vpc_config_override=vpc_config_b).vpc_config == vpc_config_b
    assert e.create_model(vpc_config_override=None).vpc_config is None

    with pytest.raises(ValueError):
        e.get_vpc_config(vpc_config_override={'invalid'})
    with pytest.raises(ValueError):
        e.create_model(vpc_config_override={'invalid'})
Exemple #3
0
def main():
    args = get_args()

    sess = sagemaker.Session()
    role = get_execution_role()

    client = boto3.client('sts')
    account = client.get_caller_identity()['Account']

    my_session = boto3.session.Session()
    region = my_session.region_name

    container_name = args.container_name
    ecr_image = '{}.dkr.ecr.{}.amazonaws.com/{}:latest'.format(
        account, region, container_name)

    inputs = sess.upload_data(path=args.data, key_prefix=DATASET_PREFIX)

    hyperparameters = {'train-steps': 1000}
    instance_type = 'ml.m4.xlarge'
    estimator = Estimator(role=role,
                          hyperparameters=hyperparameters,
                          instance_count=1,
                          instance_type=instance_type,
                          image_uri=ecr_image)
    estimator.fit(inputs)
def test_byo_estimator(sagemaker_session, region):
    """Use Factorization Machines algorithm as an example here.

    First we need to prepare data for training. We take standard data set, convert it to the
    format that the algorithm can process and upload it to S3.
    Then we create the Estimator and set hyperparamets as required by the algorithm.
    Next, we can call fit() with path to the S3.
    Later the trained model is deployed and prediction is called against the endpoint.
    Default predictor is updated with json serializer and deserializer.

    """
    image_name = registry(region) + "/factorization-machines:1"
    training_data_path = os.path.join(DATA_DIR, "dummy_tensor")
    job_name = unique_name_from_base("byo")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
        pickle_args = {} if sys.version_info.major == 2 else {
            "encoding": "latin1"
        }

        with gzip.open(data_path, "rb") as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        prefix = "test_byo_estimator"
        key = "recordio-pb-data"

        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(
                                                          prefix, "train",
                                                          key))

        estimator = Estimator(
            image_name=image_name,
            role="SageMakerRole",
            train_instance_count=1,
            train_instance_type="ml.c4.xlarge",
            sagemaker_session=sagemaker_session,
        )

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type="binary_classifier")

        # training labels must be 'float32'
        estimator.fit({"train": s3_train_data}, job_name=job_name)

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        model = estimator.create_model()
        predictor = model.deploy(1, "ml.m4.xlarge", endpoint_name=job_name)
        predictor.serializer = fm_serializer
        predictor.content_type = "application/json"
        predictor.deserializer = sagemaker.predictor.json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result["predictions"]) == 10
        for prediction in result["predictions"]:
            assert prediction["score"] is not None
def test_generic_to_deploy(sagemaker_session):
    e = Estimator(IMAGE_NAME, ROLE, INSTANCE_COUNT, INSTANCE_TYPE, output_path=OUTPUT_PATH,
                  sagemaker_session=sagemaker_session)

    e.set_hyperparameters(**HYPERPARAMS)

    e.fit({'train': 's3://bucket/training-prefix'})

    predictor = e.deploy(INSTANCE_COUNT, INSTANCE_TYPE)

    sagemaker_session.train.assert_called_once()
    assert len(sagemaker_session.train.call_args[0]) == 0
    args = sagemaker_session.train.call_args[1]
    assert args['job_name'].startswith(IMAGE_NAME)

    args.pop('job_name')
    args.pop('role')

    assert args == HP_TRAIN_CALL

    sagemaker_session.create_model.assert_called_once()
    args = sagemaker_session.create_model.call_args[0]
    assert args[0].startswith(IMAGE_NAME)
    assert args[1] == ROLE
    assert args[2]['Image'] == IMAGE_NAME
    assert args[2]['ModelDataUrl'] == MODEL_DATA

    assert isinstance(predictor, RealTimePredictor)
    assert predictor.endpoint.startswith(IMAGE_NAME)
    assert predictor.sagemaker_session == sagemaker_session
def run_benchmark(instance_count,
                  subnet,
                  security_group,
                  aws_account,
                  base_image,
                  region='us-west-2',
                  role="SageMakerRole",
                  tag='tensorflow-hvd:latest',
                  build_image=False,
                  wait=True):

    if build_image:
        build(base_image=base_image,
              entrypoint='launcher.sh',
              source_dir='benchmarks',
              tag=tag)

    ecr_image_name = push(tag)

    output_path = 's3://sagemaker-{}-{}/hvd-1-single/{}node-{}'.format(
        region, aws_account, instance_count, time.time_ns())

    estimator = Estimator(ecr_image_name,
                          role=role,
                          base_job_name='hvd-bench',
                          hyperparameters={},
                          train_instance_count=instance_count,
                          train_instance_type='ml.p3.16xlarge',
                          output_path=output_path,
                          subnets=[subnet],
                          security_group_ids=[security_group])

    estimator.fit('s3://sagemaker-sample-data-%s/spark/mnist/train/' % region,
                  wait=wait)
def test_generic_deploy_vpc_config_override(sagemaker_session):
    vpc_config_a = {'Subnets': ['foo'], 'SecurityGroupIds': ['bar']}
    vpc_config_b = {'Subnets': ['foo', 'bar'], 'SecurityGroupIds': ['baz']}

    e = Estimator(IMAGE_NAME,
                  ROLE,
                  INSTANCE_COUNT,
                  INSTANCE_TYPE,
                  sagemaker_session=sagemaker_session)
    e.fit({'train': 's3://bucket/training-prefix'})
    e.deploy(INSTANCE_COUNT, INSTANCE_TYPE)
    assert sagemaker_session.create_model.call_args_list[0][1][
        'vpc_config'] is None

    e.subnets = vpc_config_a['Subnets']
    e.security_group_ids = vpc_config_a['SecurityGroupIds']
    e.deploy(INSTANCE_COUNT, INSTANCE_TYPE)
    assert sagemaker_session.create_model.call_args_list[1][1][
        'vpc_config'] == vpc_config_a

    e.deploy(INSTANCE_COUNT, INSTANCE_TYPE, vpc_config_override=vpc_config_b)
    assert sagemaker_session.create_model.call_args_list[2][1][
        'vpc_config'] == vpc_config_b

    e.deploy(INSTANCE_COUNT, INSTANCE_TYPE, vpc_config_override=None)
    assert sagemaker_session.create_model.call_args_list[3][1][
        'vpc_config'] is None
Exemple #8
0
def main():
    download_training_and_eval_data()

    print('Starting model training.')
    print(
        'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.'
    )

    image = 'sagemaker-hdbscan-local'

    local_estimator = Estimator(image,
                                DUMMY_IAM_ROLE,
                                instance_count=1,
                                instance_type="local",
                                hyperparameters={
                                    "min_cluster_size": 50,
                                })

    train_location = 'file://' + local_train

    local_estimator.fit({'train': train_location})
    print('Completed model training')

    model_data = local_estimator.model_data
    print(model_data)
def test_generic_to_deploy(sagemaker_session):
    e = Estimator(IMAGE_NAME,
                  ROLE,
                  INSTANCE_COUNT,
                  INSTANCE_TYPE,
                  output_path=OUTPUT_PATH,
                  sagemaker_session=sagemaker_session)

    e.set_hyperparameters(**HYPERPARAMS)

    e.fit({'train': 's3://bucket/training-prefix'})

    predictor = e.deploy(INSTANCE_COUNT, INSTANCE_TYPE)

    sagemaker_session.train.assert_called_once()
    assert len(sagemaker_session.train.call_args[0]) == 0
    args = sagemaker_session.train.call_args[1]
    assert args['job_name'].startswith(IMAGE_NAME)

    args.pop('job_name')
    args.pop('role')

    assert args == HP_TRAIN_CALL

    sagemaker_session.create_model.assert_called_once()
    args = sagemaker_session.create_model.call_args[0]
    assert args[0].startswith(IMAGE_NAME)
    assert args[1] == ROLE
    assert args[2]['Image'] == IMAGE_NAME
    assert args[2]['ModelDataUrl'] == MODEL_DATA

    assert isinstance(predictor, RealTimePredictor)
    assert predictor.endpoint.startswith(IMAGE_NAME)
    assert predictor.sagemaker_session == sagemaker_session
def test_xgb_train_container_cpu(sagemaker_session, instance_type):
    training_data_path = os.path.join(test_dir, 'resources/data/')
    estimator = Estimator(role=ROLE,
                          sagemaker_session=sagemaker_session,
                          train_instance_count=1,
                          train_instance_type=instance_type,
                          image_name=XGB_IMAGE_NAME,
                          output_path=MODEL_SAVE_PATH,
                          hyperparameters={
                              "train-file": "penguins.csv",
                              "max-depth": 3,
                              "categorical-columns": 'island,sex'
                          })

    inputs = estimator.sagemaker_session.upload_data(path=os.path.join(
        training_data_path, 'penguins.csv'),
                                                     bucket=BUCKET_NAME,
                                                     key_prefix='penguins/tmp')
    estimator.fit(
        inputs, job_name=unique_name_from_base('test-sagemaker-xgb-training'))

    # Clean up the models folder and re-create it
    if os.path.exists(os.path.join(test_dir, 'resources/models_tar')):
        shutil.rmtree(os.path.join(test_dir, 'resources/models_tar'))
        os.mkdir(os.path.join(test_dir, 'resources/models_tar'))

    # Download the model files
    obj_name = os.path.relpath(estimator.model_data, 's3://' + BUCKET_NAME)
    s3.Bucket(BUCKET_NAME).download_file(
        obj_name, os.path.join(test_dir, 'resources/models_tar/model.tar.gz'))

    _assert_s3_file_exists(sagemaker_session.boto_region_name,
                           estimator.model_data)
def test_async_byo_estimator(sagemaker_session, region):
    image_name = registry(region) + "/factorization-machines:1"
    endpoint_name = unique_name_from_base('byo')
    training_data_path = os.path.join(DATA_DIR, 'dummy_tensor')
    training_job_name = ""

    with timeout(minutes=5):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {
            'encoding': 'latin1'
        }

        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        prefix = 'test_byo_estimator'
        key = 'recordio-pb-data'

        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(
                                                          prefix, 'train',
                                                          key))

        estimator = Estimator(image_name=image_name,
                              role='SageMakerRole',
                              train_instance_count=1,
                              train_instance_type='ml.c4.xlarge',
                              sagemaker_session=sagemaker_session,
                              base_job_name='test-byo')

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type='binary_classifier')

        # training labels must be 'float32'
        estimator.fit({'train': s3_train_data}, wait=False)
        training_job_name = estimator.latest_training_job.name

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        estimator = Estimator.attach(training_job_name=training_job_name,
                                     sagemaker_session=sagemaker_session)
        model = estimator.create_model()
        predictor = model.deploy(1,
                                 'ml.m4.xlarge',
                                 endpoint_name=endpoint_name)
        predictor.serializer = fm_serializer
        predictor.content_type = 'application/json'
        predictor.deserializer = sagemaker.predictor.json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result['predictions']) == 10
        for prediction in result['predictions']:
            assert prediction['score'] is not None

        assert estimator.train_image() == image_name
Exemple #12
0
def create_blaxing_text_model(
        region_name: str,
        sm_session: Session,
        sm_role: str,
        s3_input_url: str,
        s3_output_url: str):
    """
    Create a BlazingText model.

    Args:
        - region_name: AWS Region Name to use SageMaker in.
        - sm_session: SageMaker Session Object.
        - sm_role: SageMaker role arn that allows SM to connect to s3.
        - s3_input_url: training data input path on s3
        - s3_output_url: model artifacts output path

    Return:
        - bt_model: instance of Estimator, can be used to deploy an inference endpoint
    """
    # define container
    container = get_image_uri(region_name, "blazingtext", "latest")

    # create estimator
    bt_model = Estimator(container,
                         sm_role,
                         train_instance_count=1,
                         train_instance_type='ml.c4.2xlarge',
                         train_volume_size=30,
                         train_max_run=360000,
                         input_mode='File',
                         output_path=s3_output_url,
                         sagemaker_session=sm_session)

    # set hyperparameters
    bt_model.set_hyperparameters(mode="skipgram",
                                 epochs=5,
                                 min_count=5,
                                 sampling_threshold=0.0001,
                                 learning_rate=0.05,
                                 window_size=5,
                                 vector_dim=100,
                                 negative_samples=5,
                                 subwords=True,
                                 min_char=3,
                                 max_char=6,
                                 batch_size=11,
                                 evaluation=True)

    # define data channels
    train_data = s3_input(s3_input_url, distribution='FullyReplicated',
                          content_type='text/plain', s3_data_type='S3Prefix')
    data_channels = {'train': train_data}

    # fit model
    bt_model.fit(inputs=data_channels, logs=True)

    return bt_model
def test_async_byo_estimator(sagemaker_session, region):
    image_name = registry(region) + "/factorization-machines:1"
    endpoint_name = unique_name_from_base("byo")
    training_data_path = os.path.join(DATA_DIR, "dummy_tensor")
    job_name = unique_name_from_base("byo")

    with timeout(minutes=5):
        data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
        pickle_args = {} if sys.version_info.major == 2 else {
            "encoding": "latin1"
        }

        with gzip.open(data_path, "rb") as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        prefix = "test_byo_estimator"
        key = "recordio-pb-data"

        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(
                                                          prefix, "train",
                                                          key))

        estimator = Estimator(
            image_name=image_name,
            role="SageMakerRole",
            train_instance_count=1,
            train_instance_type="ml.c4.xlarge",
            sagemaker_session=sagemaker_session,
        )

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type="binary_classifier")

        # training labels must be 'float32'
        estimator.fit({"train": s3_train_data}, wait=False, job_name=job_name)

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        estimator = Estimator.attach(training_job_name=job_name,
                                     sagemaker_session=sagemaker_session)
        model = estimator.create_model()
        predictor = model.deploy(1,
                                 "ml.m4.xlarge",
                                 endpoint_name=endpoint_name)
        predictor.serializer = fm_serializer
        predictor.content_type = "application/json"
        predictor.deserializer = sagemaker.predictor.json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result["predictions"]) == 10
        for prediction in result["predictions"]:
            assert prediction["score"] is not None

        assert estimator.train_image() == image_name
Exemple #14
0
class SagemakerTFEstimator(object):
    def __init__(self, container_image_uri: str, sm_session: sm.Session,
                 sm_role: str, project_tag: List[Dict[str, str]],
                 tn_instance_type: str, tn_instance_count: int,
                 tn_volumesize: int, tn_job_name: str, max_run: int,
                 shared_hyperparameters: Dict[str, str], **kwargs) -> None:

        self.estimator = Estimator(
            image_uri=container_image_uri,
            instance_type=tn_instance_type,
            instance_count=tn_instance_count,
            volume_size=tn_volumesize,
            role=sm_role,
            sagemaker_session=sm_session,
            tags=project_tag,
            max_run=max_run,
            hyperparameters=shared_hyperparameters,
            **kwargs,
        )
        self._training_job_name = tn_job_name
        self._project_tag = project_tag

    def model_fit(
        self,
        inputs: Dict[str, str],
        hparam: Dict[str, Any] = None,
    ) -> None:

        if hparam is not None:

            tuner = HyperparameterTuner(
                estimator=self.estimator,
                objective_metric_name=hparam.get('objective_metric_name'),
                metric_definitions=hparam.get('metric_definitions'),
                hyperparameter_ranges=hparam.get('hyperparameter_ranges'),
                objective_type=hparam.get('objective_type'),
                max_jobs=hparam.get('max_jobs'),
                max_parallel_jobs=hparam.get('max_parallel_jobs'),
                tags=self._project_tag,
                base_tuning_job_name=self._training_job_name,
            )
            tuner.fit(
                inputs=inputs,
                job_name=self._training_job_name,
                wait=False,
                logs='All',
            )

        else:

            self.estimator.fit(
                inputs=inputs,
                job_name=self._training_job_name,
                wait=False,
                logs='All',
            )
Exemple #15
0
def test_byo_estimator(sagemaker_session, region, cpu_instance_type,
                       training_set):
    """Use Factorization Machines algorithm as an example here.

    First we need to prepare data for training. We take standard data set, convert it to the
    format that the algorithm can process and upload it to S3.
    Then we create the Estimator and set hyperparamets as required by the algorithm.
    Next, we can call fit() with path to the S3.
    Later the trained model is deployed and prediction is called against the endpoint.
    Default predictor is updated with json serializer and deserializer.

    """
    image_uri = image_uris.retrieve("factorization-machines", region)
    training_data_path = os.path.join(DATA_DIR, "dummy_tensor")
    job_name = unique_name_from_base("byo")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        prefix = "test_byo_estimator"
        key = "recordio-pb-data"

        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(
                                                          prefix, "train",
                                                          key))

        estimator = Estimator(
            image_uri=image_uri,
            role="SageMakerRole",
            instance_count=1,
            instance_type=cpu_instance_type,
            sagemaker_session=sagemaker_session,
        )

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type="binary_classifier")

        # training labels must be 'float32'
        estimator.fit({"train": s3_train_data}, job_name=job_name)

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        model = estimator.create_model()
        predictor = model.deploy(
            1,
            cpu_instance_type,
            endpoint_name=job_name,
            serializer=_FactorizationMachineSerializer(),
            deserializer=sagemaker.deserializers.JSONDeserializer(),
        )

        result = predictor.predict(training_set[0][:10])

        assert len(result["predictions"]) == 10
        for prediction in result["predictions"]:
            assert prediction["score"] is not None
def test_async_byo_estimator(sagemaker_session, region):
    image_name = registry(region) + "/factorization-machines:1"
    endpoint_name = name_from_base('byo')
    training_job_name = ""

    with timeout(minutes=5):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}

        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        # take 100 examples for faster execution
        vectors = np.array([t.tolist() for t in train_set[0][:100]]).astype('float32')
        labels = np.where(np.array([t.tolist() for t in train_set[1][:100]]) == 0, 1.0, 0.0).astype('float32')

        buf = io.BytesIO()
        write_numpy_to_dense_tensor(buf, vectors, labels)
        buf.seek(0)

        bucket = sagemaker_session.default_bucket()
        prefix = 'test_byo_estimator'
        key = 'recordio-pb-data'
        boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)
        s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key)

        estimator = Estimator(image_name=image_name,
                              role='SageMakerRole', train_instance_count=1,
                              train_instance_type='ml.c4.xlarge',
                              sagemaker_session=sagemaker_session, base_job_name='test-byo')

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type='binary_classifier')

        # training labels must be 'float32'
        estimator.fit({'train': s3_train_data}, wait=False)
        training_job_name = estimator.latest_training_job.name

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        estimator = Estimator.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session)
        model = estimator.create_model()
        predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name)
        predictor.serializer = fm_serializer
        predictor.content_type = 'application/json'
        predictor.deserializer = sagemaker.predictor.json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result['predictions']) == 10
        for prediction in result['predictions']:
            assert prediction['score'] is not None

        assert estimator.train_image() == image_name
Exemple #17
0
def test_async_byo_estimator(sagemaker_session, region):
    image_name = registry(region) + "/factorization-machines:1"
    endpoint_name = name_from_base('byo')
    training_job_name = ""

    with timeout(minutes=5):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}

        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        # take 100 examples for faster execution
        vectors = np.array([t.tolist() for t in train_set[0][:100]]).astype('float32')
        labels = np.where(np.array([t.tolist() for t in train_set[1][:100]]) == 0, 1.0, 0.0).astype('float32')

        buf = io.BytesIO()
        write_numpy_to_dense_tensor(buf, vectors, labels)
        buf.seek(0)

        bucket = sagemaker_session.default_bucket()
        prefix = 'test_byo_estimator'
        key = 'recordio-pb-data'
        boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)
        s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key)

        estimator = Estimator(image_name=image_name,
                              role='SageMakerRole', train_instance_count=1,
                              train_instance_type='ml.c4.xlarge',
                              sagemaker_session=sagemaker_session, base_job_name='test-byo')

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type='binary_classifier')

        # training labels must be 'float32'
        estimator.fit({'train': s3_train_data}, wait=False)
        training_job_name = estimator.latest_training_job.name

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        estimator = Estimator.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session)
        model = estimator.create_model()
        predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name)
        predictor.serializer = fm_serializer
        predictor.content_type = 'application/json'
        predictor.deserializer = sagemaker.predictor.json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result['predictions']) == 10
        for prediction in result['predictions']:
            assert prediction['score'] is not None

        assert estimator.train_image() == image_name
def test_byo_estimator(sagemaker_session, region):
    """Use Factorization Machines algorithm as an example here.

    First we need to prepare data for training. We take standard data set, convert it to the
    format that the algorithm can process and upload it to S3.
    Then we create the Estimator and set hyperparamets as required by the algorithm.
    Next, we can call fit() with path to the S3.
    Later the trained model is deployed and prediction is called against the endpoint.
    Default predictor is updated with json serializer and deserializer.

    """
    image_name = registry(region) + "/factorization-machines:1"
    training_data_path = os.path.join(DATA_DIR, 'dummy_tensor')

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}

        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        prefix = 'test_byo_estimator'
        key = 'recordio-pb-data'

        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(prefix, 'train', key))

        estimator = Estimator(image_name=image_name,
                              role='SageMakerRole', train_instance_count=1,
                              train_instance_type='ml.c4.xlarge',
                              sagemaker_session=sagemaker_session, base_job_name='test-byo')

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type='binary_classifier')

        # training labels must be 'float32'
        estimator.fit({'train': s3_train_data})

    endpoint_name = name_from_base('byo')

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        model = estimator.create_model()
        predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name)
        predictor.serializer = fm_serializer
        predictor.content_type = 'application/json'
        predictor.deserializer = sagemaker.predictor.json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result['predictions']) == 10
        for prediction in result['predictions']:
            assert prediction['score'] is not None
Exemple #19
0
def main(
    gpu: bool = typer.Option(
        False,
        "--gpu",
        help=
        "Should a GPU based docker image be used? If this flag is set, and you are running a SageMaker job, you must specify an instance with a GPU (e.g. ml.p2/3...).",
    ),
    instance_type: str = typer.Option(
        "local",
        help=
        "SageMaker instance used to run the model, e.g. ml.p2.xlarge or ml.c5.xlarge. Setting this to local will run the container locally.",
    ),
):

    image_name = f"{REPO_URL}:{VERSION}"

    if gpu:
        image_name = image_name + "-gpu"

    input_channels = {
        "train": train,
        "test": test,
        "word_embedding": word_embedding,
        "indices": indices,
        # Setting these to file:// will upload the data from the local drive
        # "train": "file://data/processed/train.jsonl",
        # "test": "file://data/processed/test.jsonl",
        # "word_embedding": "file://data/raw/glove.6B.50d.txt",
    }
    estimator = Estimator(
        image_name=image_name,
        role=ROLE_ARN,
        train_instance_count=1,
        train_instance_type=instance_type,
        hyperparameters={
            "test-path": "/opt/ml/input/data/test/" + test_file,
            "train-path": "/opt/ml/input/data/train/" + train_file,
            "indices-path": "/opt/ml/input/data/indices/" + indices_file,
            "output-path": "/opt/ml/model/",
            "model-output-path": "/opt/ml/model/",
            "embedding-path":
            "/opt/ml/input/data/word_embedding/" + word_embedding_file,
            "embedding-dim": 50,
            "batch-size": 1024,
            "epochs": 2,
            "learning-rate": 0.01,
            "seq-length": 1000,
            "checkpoint": True,
            "checkpoint-path": "/opt/ml/model/",
        },
    )

    estimator.fit(inputs=input_channels)
Exemple #20
0
def test_async_byo_estimator(sagemaker_session, region, cpu_instance_type,
                             training_set):
    image_uri = image_uris.retrieve("factorization-machines", region)
    endpoint_name = unique_name_from_base("byo")
    training_data_path = os.path.join(DATA_DIR, "dummy_tensor")
    job_name = unique_name_from_base("byo")

    with timeout(minutes=5):
        prefix = "test_byo_estimator"
        key = "recordio-pb-data"

        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(
                                                          prefix, "train",
                                                          key))

        estimator = Estimator(
            image_uri=image_uri,
            role="SageMakerRole",
            instance_count=1,
            instance_type=cpu_instance_type,
            sagemaker_session=sagemaker_session,
        )

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type="binary_classifier")

        # training labels must be 'float32'
        estimator.fit({"train": s3_train_data}, wait=False, job_name=job_name)

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        estimator = Estimator.attach(training_job_name=job_name,
                                     sagemaker_session=sagemaker_session)
        model = estimator.create_model()
        predictor = model.deploy(
            1,
            cpu_instance_type,
            endpoint_name=endpoint_name,
            serializer=_FactorizationMachineSerializer(),
            deserializer=sagemaker.deserializers.JSONDeserializer(),
        )

        result = predictor.predict(training_set[0][:10])

        assert len(result["predictions"]) == 10
        for prediction in result["predictions"]:
            assert prediction["score"] is not None

        assert estimator.training_image_uri() == image_uri
def test_generic_deploy_accelerator_type(sagemaker_session):
    e = Estimator(IMAGE_NAME,
                  ROLE,
                  INSTANCE_COUNT,
                  INSTANCE_TYPE,
                  sagemaker_session=sagemaker_session)
    e.fit({'train': 's3://bucket/training-prefix'})
    e.deploy(INSTANCE_COUNT, INSTANCE_TYPE, ACCELERATOR_TYPE)

    args = e.sagemaker_session.endpoint_from_production_variants.call_args[0]
    assert args[0].startswith(IMAGE_NAME)
    assert args[1][0]['AcceleratorType'] == ACCELERATOR_TYPE
    assert args[1][0]['InitialInstanceCount'] == INSTANCE_COUNT
    assert args[1][0]['InstanceType'] == INSTANCE_TYPE
Exemple #22
0
def test_install_requirements(capsys):
    estimator = Estimator(
        image_name="sagemaker-training-toolkit-test:dummy",
        role="SageMakerRole",
        train_instance_count=1,
        train_instance_type="local",
    )

    estimator.fit()

    stdout = capsys.readouterr().out

    assert "Installing collected packages: pyfiglet, train.py" in stdout
    assert "Successfully installed pyfiglet-0.8.post1 train.py-1.0.0" in stdout
    assert "Reporting training SUCCESS" in stdout
def test_generic_to_fit_no_hps(sagemaker_session):
    e = Estimator(IMAGE_NAME, ROLE, INSTANCE_COUNT, INSTANCE_TYPE, output_path=OUTPUT_PATH,
                  sagemaker_session=sagemaker_session)

    e.fit({'train': 's3://bucket/training-prefix'})

    sagemaker_session.train.assert_called_once()
    assert len(sagemaker_session.train.call_args[0]) == 0
    args = sagemaker_session.train.call_args[1]
    assert args['job_name'].startswith(IMAGE_NAME)

    args.pop('job_name')
    args.pop('role')

    assert args == BASE_TRAIN_CALL
def test_generic_training_job_analytics(sagemaker_session):
    sagemaker_session.sagemaker_client.describe_training_job = Mock(
        name='describe_training_job',
        return_value={
            'TuningJobArn':
            'arn:aws:sagemaker:us-west-2:968277160000:hyper-parameter-tuning-job/mock-tuner',
            'TrainingStartTime': 1530562991.299,
        })
    sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job = Mock(
        name='describe_hyper_parameter_tuning_job',
        return_value={
            'TrainingJobDefinition': {
                "AlgorithmSpecification": {
                    "TrainingImage":
                    "some-image-url",
                    "TrainingInputMode":
                    "File",
                    "MetricDefinitions": [{
                        "Name":
                        "train:loss",
                        "Regex":
                        "train_loss=([0-9]+\\.[0-9]+)"
                    }, {
                        "Name":
                        "validation:loss",
                        "Regex":
                        "valid_loss=([0-9]+\\.[0-9]+)"
                    }]
                }
            }
        })

    e = Estimator(IMAGE_NAME,
                  ROLE,
                  INSTANCE_COUNT,
                  INSTANCE_TYPE,
                  output_path=OUTPUT_PATH,
                  sagemaker_session=sagemaker_session)

    with pytest.raises(ValueError) as err:  # noqa: F841
        # No training job yet
        a = e.training_job_analytics
        assert a is not None  # This line is never reached

    e.set_hyperparameters(**HYPERPARAMS)
    e.fit({'train': 's3://bucket/training-prefix'})
    a = e.training_job_analytics
    assert a is not None
def test_async_byo_estimator(sagemaker_session, region):
    image_name = registry(region) + "/factorization-machines:1"
    endpoint_name = name_from_base('byo')
    training_data_path = os.path.join(DATA_DIR, 'dummy_tensor')
    training_job_name = ""

    with timeout(minutes=5):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}

        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        prefix = 'test_byo_estimator'
        key = 'recordio-pb-data'

        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(prefix, 'train', key))

        estimator = Estimator(image_name=image_name,
                              role='SageMakerRole', train_instance_count=1,
                              train_instance_type='ml.c4.xlarge',
                              sagemaker_session=sagemaker_session, base_job_name='test-byo')

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type='binary_classifier')

        # training labels must be 'float32'
        estimator.fit({'train': s3_train_data}, wait=False)
        training_job_name = estimator.latest_training_job.name

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        estimator = Estimator.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session)
        model = estimator.create_model()
        predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name)
        predictor.serializer = fm_serializer
        predictor.content_type = 'application/json'
        predictor.deserializer = sagemaker.predictor.json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result['predictions']) == 10
        for prediction in result['predictions']:
            assert prediction['score'] is not None

        assert estimator.train_image() == image_name
Exemple #26
0
 def train(self):
     """
     Train and deploy the XOR model with Sagemaker Linear Learner
     """
     estimator = Estimator(
         image_uri=self.DOCKER_IMAGE_URI,
         role=self.SAGEMAKER_ROLE,
         instance_count=1,
         hyperparameters={"predictor_type": "binary_classifier"},
         instance_type="ml.m5.large",
     )
     estimator.fit({"train": self.location})
     estimator.deploy(initial_instance_count=1,
                      instance_type=self.SAGEMAKER_INSTANCE,
                      wait=False)
     self.next(self.end)
def test_generic_to_fit_with_hps(sagemaker_session):
    e = Estimator(IMAGE_NAME, ROLE, INSTANCE_COUNT, INSTANCE_TYPE, output_path='s3://bucket/prefix',
                  sagemaker_session=sagemaker_session)

    e.set_hyperparameters(**HYPERPARAMS)

    e.fit({'train': 's3://bucket/training-prefix'})

    sagemaker_session.train.assert_called_once()
    assert len(sagemaker_session.train.call_args[0]) == 0
    args = sagemaker_session.train.call_args[1]
    assert args['job_name'].startswith(IMAGE_NAME)

    args.pop('job_name')
    args.pop('role')

    assert args == HP_TRAIN_CALL
def test_generic_to_fit_no_input(sagemaker_session):
    e = Estimator(IMAGE_NAME,
                  ROLE,
                  INSTANCE_COUNT,
                  INSTANCE_TYPE,
                  output_path=OUTPUT_PATH,
                  sagemaker_session=sagemaker_session)

    e.fit()

    sagemaker_session.train.assert_called_once()
    assert len(sagemaker_session.train.call_args[0]) == 0
    args = sagemaker_session.train.call_args[1]
    assert args['job_name'].startswith(IMAGE_NAME)

    args.pop('job_name')
    args.pop('role')

    assert args == NO_INPUT_TRAIN_CALL
def test_run(resources_folder, sagemaker_role, image, tmpdir, sm_session,
             key_prefix):

    with change_dir(os.path.join(resources_folder, 'ml/code')):
        tar_file = create_tar_file(os.listdir(), target=tmpdir / 'code.tar.gz')

    s3_uri = sm_session.upload_data(path=str(tar_file), key_prefix=key_prefix)

    estimator = Estimator(image_name=image,
                          hyperparameters={
                              DIR_PARAM_NAME: s3_uri,
                              'alpha': '1.0',
                              'sagemaker_mlflow_experiment_id': '2.0',
                          },
                          role=sagemaker_role,
                          train_instance_count=1,
                          train_instance_type='ml.m4.xlarge')

    estimator.fit()

    _assert_s3_file_exists(sm_session.boto_region_name, estimator.model_data)
Exemple #30
0
def test_xgb_train_container_cpu(sagemaker_local_session, build_xgb_image):
    model_save_path = 'file:///home/ubuntu/penguin-sagemaker/test/resources/models_tar'  # Has to be absolute path for local
    if os.path.exists(os.path.join(test_dir, 'resources/models_tar', 'model.tar.gz')):
        os.remove(os.path.join(test_dir, 'resources/models_tar', 'model.tar.gz'))
        time.sleep(3)
    model_data_path = 'file://' + os.path.join(test_dir, 'resources/data/')

    estimator = Estimator(
        role='arn:aws:iam::784420883498:role/service-role/AmazonSageMaker-ExecutionRole-20200313T094543',
        sagemaker_session=sagemaker_local_session,
        train_instance_count=1,
        train_instance_type='local',
        image_name=IMAGE_NAME,
        output_path=model_save_path,
        hyperparameters={"train-file": "penguins.csv",
                         "max-depth": 3,
                         "categorical-columns": 'island,sex'})

    estimator.fit(model_data_path, wait=True)  # Not sure if it would work with relative paths

    _assert_files_exist_in_tar(model_save_path, ['penguin_xgb_model.json'])
Exemple #31
0
def run(mode):

    if mode == "local":
        os.system('docker build -t aws-train .')
        estimator = Estimator(image_uri='aws-train:latest',
                              role=role,
                              instance_count=1,
                              instance_type='local',
                              output_path=local_output_path,
                              base_job_name=project_name)

        print("Local: Start fitting ... ")
        estimator.fit(inputs=f"file://{local_data_path}", job_name=job_name)

    elif mode == "sagemaker":
        s3_output_location = 's3://{}/{}'.format(bucket_name, project_name)
        estimator = Estimator(image_uri=image_uri,
                              role=role,
                              instance_count=1,
                              instance_type=instance_type,
                              output_path=s3_output_location,
                              base_job_name=project_name)
        print("Sagemaker: Start fitting")

        estimator.fit(inputs=data_uri, job_name=job_name)

        s3_output_uri = "s3://{}/{}/{}/output/model.tar.gz".format(
            bucket_name, project_name, job_name)
        s3 = boto_session.client('s3')
        s3_bucket, key_name = split_s3_bucket_key(s3_output_uri)
        s3.download_file(s3_bucket, key_name, 'model.tar.gz')

    else:
        print(
            "No supported mode found. Please specify from the following: local or sagemaker"
        )

    my_tar = tarfile.open('model.tar.gz')
    my_tar.extractall('./model')
    my_tar.close()
def test_generic_training_job_analytics(sagemaker_session):
    sagemaker_session.sagemaker_client.describe_training_job = Mock(name='describe_training_job', return_value={
        'TuningJobArn': 'arn:aws:sagemaker:us-west-2:968277160000:hyper-parameter-tuning-job/mock-tuner',
        'TrainingStartTime': 1530562991.299,
    })
    sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job = Mock(
        name='describe_hyper_parameter_tuning_job',
        return_value={
            'TrainingJobDefinition': {
                "AlgorithmSpecification": {
                    "TrainingImage": "some-image-url",
                    "TrainingInputMode": "File",
                    "MetricDefinitions": [
                        {
                            "Name": "train:loss",
                            "Regex": "train_loss=([0-9]+\\.[0-9]+)"
                        },
                        {
                            "Name": "validation:loss",
                            "Regex": "valid_loss=([0-9]+\\.[0-9]+)"
                        }
                    ]
                }
            }
        }
    )

    e = Estimator(IMAGE_NAME, ROLE, INSTANCE_COUNT, INSTANCE_TYPE, output_path=OUTPUT_PATH,
                  sagemaker_session=sagemaker_session)

    with pytest.raises(ValueError) as err:  # noqa: F841
        # No training job yet
        a = e.training_job_analytics
        assert a is not None  # This line is never reached

    e.set_hyperparameters(**HYPERPARAMS)
    e.fit({'train': 's3://bucket/training-prefix'})
    a = e.training_job_analytics
    assert a is not None
Exemple #33
0
def train(config_path, param_path, image_name, mode, job_id):
    with open(config_path, 'r') as f:
        config = json.load(f)

    with open(param_path, 'r') as f:
        params = json.load(f)

    if mode == 'LOCAL':
        train_instance_type = 'local'
        params['task_type'] = 'CPU'
    else:
        train_instance_type = config['train']['instance_type']
        params['task_type'] = config['train']['task_type']

    train_instance_count = config['train']['instance_count']
    role = config['role']
    model_bucket = config['model_bucket']

    logger.info(
        'Start training with parameters '
        '[job-id="{}", image="{}", mode="{}", instance_type="{}", instance_count={}, params={}]'
        .format(job_id, image_name, mode, train_instance_type,
                train_instance_count, params))

    estimator = Estimator(image_name=image_name,
                          role=role,
                          train_instance_count=train_instance_count,
                          train_instance_type=train_instance_type,
                          hyperparameters=params,
                          output_path=model_bucket,
                          metric_definitions=get_metric_definitions(),
                          train_max_run=(2 * 60 * 60))

    estimator.fit(job_name=job_id,
                  inputs={
                      'training': config['data']['train'],
                      'validation': config['data']['val'],
                      'testing': config['data']['test']
                  })
def test_local_run(resources_folder, sagemaker_role, image, tmpdir):

    mlflow_project_dir = 'file://' + os.path.join(resources_folder, 'ml/code')
    output_dir = 'file://' + str(tmpdir)

    estimator = Estimator(image_name=image,
                          hyperparameters={
                              DIR_PARAM_NAME: json.dumps(mlflow_project_dir),
                              'alpha': '1.0',
                              'sagemaker_mlflow_experiment_id': '2.0',
                          },
                          role=sagemaker_role,
                          output_path=output_dir,
                          train_instance_count=1,
                          train_instance_type='local')

    estimator.fit()

    _assert_files_exist_in_tar(output_dir, [
        f'{SAGEMAKER_MODEL_SUBDIR}/model/MLmodel',
        f'{SAGEMAKER_MODEL_SUBDIR}/model/model.pkl',
    ])
def main():
    download_training_and_eval_data()

    image = 'sagemaker-tensorflow2-batch-transform-local'

    env = {
        "MODEL_SERVER_WORKERS": "2"
    }

    print('Starting model training.')
    california_housing_estimator = Estimator(
        image,
        DUMMY_IAM_ROLE,
        hyperparameters={'epochs': 10,
                         'batch_size': 64,
                         'learning_rate': 0.1},
        instance_count=1,
        instance_type="local")

    inputs = {'train': 'file://./data/train', 'test': 'file://./data/test'}
    california_housing_estimator.fit(inputs, logs=True)
    print('Completed model training')

    print('Running Batch Transform in local mode')
    tensorflow_serving_transformer = california_housing_estimator.transformer(
        instance_count=1,
        instance_type='local',
        output_path='file:./data/output',
        env = env
    )

    tensorflow_serving_transformer.transform('file://./data/input',
                                             split_type='Line',
                                             content_type='text/csv')

    print('Printing Batch Transform output file content')
    output_file = open('./data/output/x_test.csv.out', 'r').read()
    print(output_file)
    return {'train': train_data_location, 'test': test_data_location}


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--ecr-repository', help='ECR repo where images will be pushed',
                        default='add-ecr-repo-here', required=True)
    parser.add_argument('--tf-version', default='latest')
    parser.add_argument('--instance-type', default='local', choices=['local', 'ml.c5.xlarge', 'ml.p2.xlarge'])
    args = parser.parse_args()

    tensorflow_version_tag = get_tensorflow_version_tag(args.tf_version, args.instance_type)

    image_name = get_image_name(args.ecr_repository, args.tensorflow_version_tag)

    build_image(image_name, tensorflow_version_tag)

    if not args.instance_type.startswith('local'):
        push_image(image_name)

    hyperparameters = dict(batch_size=32, data_augmentation=True, learning_rate=.0001,
                           width_shift_range=.1, height_shift_range=.1)

    estimator = Estimator(image_name, role='SageMakerRole', train_instance_count=1,
                          train_instance_type=args.instance_type, hyperparameters=hyperparameters)

    channels = upload_training_data()

    estimator.fit(channels)
def test_byo_estimator(sagemaker_session, region):
    """Use Factorization Machines algorithm as an example here.

    First we need to prepare data for training. We take standard data set, convert it to the
    format that the algorithm can process and upload it to S3.
    Then we create the Estimator and set hyperparamets as required by the algorithm.
    Next, we can call fit() with path to the S3.
    Later the trained model is deployed and prediction is called against the endpoint.
    Default predictor is updated with json serializer and deserializer.

    """
    image_name = registry(region) + "/factorization-machines:1"

    with timeout(minutes=15):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}

        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        # take 100 examples for faster execution
        vectors = np.array([t.tolist() for t in train_set[0][:100]]).astype('float32')
        labels = np.where(np.array([t.tolist() for t in train_set[1][:100]]) == 0, 1.0, 0.0).astype('float32')

        buf = io.BytesIO()
        write_numpy_to_dense_tensor(buf, vectors, labels)
        buf.seek(0)

        bucket = sagemaker_session.default_bucket()
        prefix = 'test_byo_estimator'
        key = 'recordio-pb-data'
        boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)
        s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key)

        estimator = Estimator(image_name=image_name,
                              role='SageMakerRole', train_instance_count=1,
                              train_instance_type='ml.c4.xlarge',
                              sagemaker_session=sagemaker_session, base_job_name='test-byo')

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type='binary_classifier')

        # training labels must be 'float32'
        estimator.fit({'train': s3_train_data})

    endpoint_name = name_from_base('byo')

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        model = estimator.create_model()
        predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name)
        predictor.serializer = fm_serializer
        predictor.content_type = 'application/json'
        predictor.deserializer = sagemaker.predictor.json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result['predictions']) == 10
        for prediction in result['predictions']:
            assert prediction['score'] is not None