Python Estimator.create_model Exemples, sagemaker.estimator.Estimator.create_model Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_estimator.py Projet : masry707/sagemaker-python-sdk

def test_generic_create_model_vpc_config_override(sagemaker_session):
    vpc_config_a = {'Subnets': ['foo'], 'SecurityGroupIds': ['bar']}
    vpc_config_b = {'Subnets': ['foo', 'bar'], 'SecurityGroupIds': ['baz']}

    e = Estimator(IMAGE_NAME,
                  ROLE,
                  INSTANCE_COUNT,
                  INSTANCE_TYPE,
                  sagemaker_session=sagemaker_session)
    e.fit({'train': 's3://bucket/training-prefix'})
    assert e.get_vpc_config() is None
    assert e.create_model().vpc_config is None
    assert e.create_model(
        vpc_config_override=vpc_config_a).vpc_config == vpc_config_a
    assert e.create_model(vpc_config_override=None).vpc_config is None

    e.subnets = vpc_config_a['Subnets']
    e.security_group_ids = vpc_config_a['SecurityGroupIds']
    assert e.get_vpc_config() == vpc_config_a
    assert e.create_model().vpc_config == vpc_config_a
    assert e.create_model(
        vpc_config_override=vpc_config_b).vpc_config == vpc_config_b
    assert e.create_model(vpc_config_override=None).vpc_config is None

    with pytest.raises(ValueError):
        e.get_vpc_config(vpc_config_override={'invalid'})
    with pytest.raises(ValueError):
        e.create_model(vpc_config_override={'invalid'})

Exemple #2

0

Afficher le fichier

Fichier : test_byo_estimator.py Projet : jrdeco560/sagemaker-python-sdk

def test_byo_estimator(sagemaker_session, region):
    """Use Factorization Machines algorithm as an example here.

    First we need to prepare data for training. We take standard data set, convert it to the
    format that the algorithm can process and upload it to S3.
    Then we create the Estimator and set hyperparamets as required by the algorithm.
    Next, we can call fit() with path to the S3.
    Later the trained model is deployed and prediction is called against the endpoint.
    Default predictor is updated with json serializer and deserializer.

    """
    image_name = registry(region) + "/factorization-machines:1"
    training_data_path = os.path.join(DATA_DIR, "dummy_tensor")
    job_name = unique_name_from_base("byo")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
        pickle_args = {} if sys.version_info.major == 2 else {
            "encoding": "latin1"
        }

        with gzip.open(data_path, "rb") as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        prefix = "test_byo_estimator"
        key = "recordio-pb-data"

        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(
                                                          prefix, "train",
                                                          key))

        estimator = Estimator(
            image_name=image_name,
            role="SageMakerRole",
            train_instance_count=1,
            train_instance_type="ml.c4.xlarge",
            sagemaker_session=sagemaker_session,
        )

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type="binary_classifier")

        # training labels must be 'float32'
        estimator.fit({"train": s3_train_data}, job_name=job_name)

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        model = estimator.create_model()
        predictor = model.deploy(1, "ml.m4.xlarge", endpoint_name=job_name)
        predictor.serializer = fm_serializer
        predictor.content_type = "application/json"
        predictor.deserializer = sagemaker.predictor.json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result["predictions"]) == 10
        for prediction in result["predictions"]:
            assert prediction["score"] is not None

Exemple #3

0

Afficher le fichier

Fichier : test_byo_estimator.py Projet : lanlan555/sagemaker-python-sdk

def test_async_byo_estimator(sagemaker_session, region):
    image_name = registry(region) + "/factorization-machines:1"
    endpoint_name = unique_name_from_base('byo')
    training_data_path = os.path.join(DATA_DIR, 'dummy_tensor')
    training_job_name = ""

    with timeout(minutes=5):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {
            'encoding': 'latin1'
        }

        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        prefix = 'test_byo_estimator'
        key = 'recordio-pb-data'

        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(
                                                          prefix, 'train',
                                                          key))

        estimator = Estimator(image_name=image_name,
                              role='SageMakerRole',
                              train_instance_count=1,
                              train_instance_type='ml.c4.xlarge',
                              sagemaker_session=sagemaker_session,
                              base_job_name='test-byo')

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type='binary_classifier')

        # training labels must be 'float32'
        estimator.fit({'train': s3_train_data}, wait=False)
        training_job_name = estimator.latest_training_job.name

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        estimator = Estimator.attach(training_job_name=training_job_name,
                                     sagemaker_session=sagemaker_session)
        model = estimator.create_model()
        predictor = model.deploy(1,
                                 'ml.m4.xlarge',
                                 endpoint_name=endpoint_name)
        predictor.serializer = fm_serializer
        predictor.content_type = 'application/json'
        predictor.deserializer = sagemaker.predictor.json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result['predictions']) == 10
        for prediction in result['predictions']:
            assert prediction['score'] is not None

        assert estimator.train_image() == image_name

Exemple #4

0

Afficher le fichier

Fichier : test_byo_estimator.py Projet : jrdeco560/sagemaker-python-sdk

def test_async_byo_estimator(sagemaker_session, region):
    image_name = registry(region) + "/factorization-machines:1"
    endpoint_name = unique_name_from_base("byo")
    training_data_path = os.path.join(DATA_DIR, "dummy_tensor")
    job_name = unique_name_from_base("byo")

    with timeout(minutes=5):
        data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
        pickle_args = {} if sys.version_info.major == 2 else {
            "encoding": "latin1"
        }

        with gzip.open(data_path, "rb") as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        prefix = "test_byo_estimator"
        key = "recordio-pb-data"

        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(
                                                          prefix, "train",
                                                          key))

        estimator = Estimator(
            image_name=image_name,
            role="SageMakerRole",
            train_instance_count=1,
            train_instance_type="ml.c4.xlarge",
            sagemaker_session=sagemaker_session,
        )

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type="binary_classifier")

        # training labels must be 'float32'
        estimator.fit({"train": s3_train_data}, wait=False, job_name=job_name)

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        estimator = Estimator.attach(training_job_name=job_name,
                                     sagemaker_session=sagemaker_session)
        model = estimator.create_model()
        predictor = model.deploy(1,
                                 "ml.m4.xlarge",
                                 endpoint_name=endpoint_name)
        predictor.serializer = fm_serializer
        predictor.content_type = "application/json"
        predictor.deserializer = sagemaker.predictor.json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result["predictions"]) == 10
        for prediction in result["predictions"]:
            assert prediction["score"] is not None

        assert estimator.train_image() == image_name

Exemple #5

0

Afficher le fichier

def test_byo_estimator(sagemaker_session, region, cpu_instance_type,
                       training_set):
    """Use Factorization Machines algorithm as an example here.

    First we need to prepare data for training. We take standard data set, convert it to the
    format that the algorithm can process and upload it to S3.
    Then we create the Estimator and set hyperparamets as required by the algorithm.
    Next, we can call fit() with path to the S3.
    Later the trained model is deployed and prediction is called against the endpoint.
    Default predictor is updated with json serializer and deserializer.

    """
    image_uri = image_uris.retrieve("factorization-machines", region)
    training_data_path = os.path.join(DATA_DIR, "dummy_tensor")
    job_name = unique_name_from_base("byo")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        prefix = "test_byo_estimator"
        key = "recordio-pb-data"

        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(
                                                          prefix, "train",
                                                          key))

        estimator = Estimator(
            image_uri=image_uri,
            role="SageMakerRole",
            instance_count=1,
            instance_type=cpu_instance_type,
            sagemaker_session=sagemaker_session,
        )

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type="binary_classifier")

        # training labels must be 'float32'
        estimator.fit({"train": s3_train_data}, job_name=job_name)

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        model = estimator.create_model()
        predictor = model.deploy(
            1,
            cpu_instance_type,
            endpoint_name=job_name,
            serializer=_FactorizationMachineSerializer(),
            deserializer=sagemaker.deserializers.JSONDeserializer(),
        )

        result = predictor.predict(training_set[0][:10])

        assert len(result["predictions"]) == 10
        for prediction in result["predictions"]:
            assert prediction["score"] is not None

Exemple #6

0

Afficher le fichier

def test_async_byo_estimator(sagemaker_session, region):
    image_name = registry(region) + "/factorization-machines:1"
    endpoint_name = name_from_base('byo')
    training_job_name = ""

    with timeout(minutes=5):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}

        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        # take 100 examples for faster execution
        vectors = np.array([t.tolist() for t in train_set[0][:100]]).astype('float32')
        labels = np.where(np.array([t.tolist() for t in train_set[1][:100]]) == 0, 1.0, 0.0).astype('float32')

        buf = io.BytesIO()
        write_numpy_to_dense_tensor(buf, vectors, labels)
        buf.seek(0)

        bucket = sagemaker_session.default_bucket()
        prefix = 'test_byo_estimator'
        key = 'recordio-pb-data'
        boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)
        s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key)

        estimator = Estimator(image_name=image_name,
                              role='SageMakerRole', train_instance_count=1,
                              train_instance_type='ml.c4.xlarge',
                              sagemaker_session=sagemaker_session, base_job_name='test-byo')

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type='binary_classifier')

        # training labels must be 'float32'
        estimator.fit({'train': s3_train_data}, wait=False)
        training_job_name = estimator.latest_training_job.name

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        estimator = Estimator.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session)
        model = estimator.create_model()
        predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name)
        predictor.serializer = fm_serializer
        predictor.content_type = 'application/json'
        predictor.deserializer = sagemaker.predictor.json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result['predictions']) == 10
        for prediction in result['predictions']:
            assert prediction['score'] is not None

        assert estimator.train_image() == image_name

Exemple #7

0

Afficher le fichier

Fichier : test_byo_estimator.py Projet : duasahil8/sagemaker-python-sdk

def test_async_byo_estimator(sagemaker_session, region):
    image_name = registry(region) + "/factorization-machines:1"
    endpoint_name = name_from_base('byo')
    training_job_name = ""

    with timeout(minutes=5):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}

        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        # take 100 examples for faster execution
        vectors = np.array([t.tolist() for t in train_set[0][:100]]).astype('float32')
        labels = np.where(np.array([t.tolist() for t in train_set[1][:100]]) == 0, 1.0, 0.0).astype('float32')

        buf = io.BytesIO()
        write_numpy_to_dense_tensor(buf, vectors, labels)
        buf.seek(0)

        bucket = sagemaker_session.default_bucket()
        prefix = 'test_byo_estimator'
        key = 'recordio-pb-data'
        boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)
        s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key)

        estimator = Estimator(image_name=image_name,
                              role='SageMakerRole', train_instance_count=1,
                              train_instance_type='ml.c4.xlarge',
                              sagemaker_session=sagemaker_session, base_job_name='test-byo')

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type='binary_classifier')

        # training labels must be 'float32'
        estimator.fit({'train': s3_train_data}, wait=False)
        training_job_name = estimator.latest_training_job.name

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        estimator = Estimator.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session)
        model = estimator.create_model()
        predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name)
        predictor.serializer = fm_serializer
        predictor.content_type = 'application/json'
        predictor.deserializer = sagemaker.predictor.json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result['predictions']) == 10
        for prediction in result['predictions']:
            assert prediction['score'] is not None

        assert estimator.train_image() == image_name

Exemple #8

0

Afficher le fichier

Fichier : test_byo_estimator.py Projet : cheesama/sagemaker-python-sdk

def test_byo_estimator(sagemaker_session, region):
    """Use Factorization Machines algorithm as an example here.

    First we need to prepare data for training. We take standard data set, convert it to the
    format that the algorithm can process and upload it to S3.
    Then we create the Estimator and set hyperparamets as required by the algorithm.
    Next, we can call fit() with path to the S3.
    Later the trained model is deployed and prediction is called against the endpoint.
    Default predictor is updated with json serializer and deserializer.

    """
    image_name = registry(region) + "/factorization-machines:1"
    training_data_path = os.path.join(DATA_DIR, 'dummy_tensor')

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}

        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        prefix = 'test_byo_estimator'
        key = 'recordio-pb-data'

        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(prefix, 'train', key))

        estimator = Estimator(image_name=image_name,
                              role='SageMakerRole', train_instance_count=1,
                              train_instance_type='ml.c4.xlarge',
                              sagemaker_session=sagemaker_session, base_job_name='test-byo')

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type='binary_classifier')

        # training labels must be 'float32'
        estimator.fit({'train': s3_train_data})

    endpoint_name = name_from_base('byo')

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        model = estimator.create_model()
        predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name)
        predictor.serializer = fm_serializer
        predictor.content_type = 'application/json'
        predictor.deserializer = sagemaker.predictor.json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result['predictions']) == 10
        for prediction in result['predictions']:
            assert prediction['score'] is not None

Exemple #9

0

Afficher le fichier

def test_async_byo_estimator(sagemaker_session, region, cpu_instance_type,
                             training_set):
    image_uri = image_uris.retrieve("factorization-machines", region)
    endpoint_name = unique_name_from_base("byo")
    training_data_path = os.path.join(DATA_DIR, "dummy_tensor")
    job_name = unique_name_from_base("byo")

    with timeout(minutes=5):
        prefix = "test_byo_estimator"
        key = "recordio-pb-data"

        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(
                                                          prefix, "train",
                                                          key))

        estimator = Estimator(
            image_uri=image_uri,
            role="SageMakerRole",
            instance_count=1,
            instance_type=cpu_instance_type,
            sagemaker_session=sagemaker_session,
        )

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type="binary_classifier")

        # training labels must be 'float32'
        estimator.fit({"train": s3_train_data}, wait=False, job_name=job_name)

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        estimator = Estimator.attach(training_job_name=job_name,
                                     sagemaker_session=sagemaker_session)
        model = estimator.create_model()
        predictor = model.deploy(
            1,
            cpu_instance_type,
            endpoint_name=endpoint_name,
            serializer=_FactorizationMachineSerializer(),
            deserializer=sagemaker.deserializers.JSONDeserializer(),
        )

        result = predictor.predict(training_set[0][:10])

        assert len(result["predictions"]) == 10
        for prediction in result["predictions"]:
            assert prediction["score"] is not None

        assert estimator.training_image_uri() == image_uri

Exemple #10

0

Afficher le fichier

Fichier : test_byo_estimator.py Projet : cheesama/sagemaker-python-sdk

def test_async_byo_estimator(sagemaker_session, region):
    image_name = registry(region) + "/factorization-machines:1"
    endpoint_name = name_from_base('byo')
    training_data_path = os.path.join(DATA_DIR, 'dummy_tensor')
    training_job_name = ""

    with timeout(minutes=5):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}

        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        prefix = 'test_byo_estimator'
        key = 'recordio-pb-data'

        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(prefix, 'train', key))

        estimator = Estimator(image_name=image_name,
                              role='SageMakerRole', train_instance_count=1,
                              train_instance_type='ml.c4.xlarge',
                              sagemaker_session=sagemaker_session, base_job_name='test-byo')

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type='binary_classifier')

        # training labels must be 'float32'
        estimator.fit({'train': s3_train_data}, wait=False)
        training_job_name = estimator.latest_training_job.name

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        estimator = Estimator.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session)
        model = estimator.create_model()
        predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name)
        predictor.serializer = fm_serializer
        predictor.content_type = 'application/json'
        predictor.deserializer = sagemaker.predictor.json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result['predictions']) == 10
        for prediction in result['predictions']:
            assert prediction['score'] is not None

        assert estimator.train_image() == image_name

Exemple #11

0

Afficher le fichier

def test_byo_estimator(sagemaker_session, region):
    """Use Factorization Machines algorithm as an example here.

    First we need to prepare data for training. We take standard data set, convert it to the
    format that the algorithm can process and upload it to S3.
    Then we create the Estimator and set hyperparamets as required by the algorithm.
    Next, we can call fit() with path to the S3.
    Later the trained model is deployed and prediction is called against the endpoint.
    Default predictor is updated with json serializer and deserializer.

    """
    image_name = registry(region) + "/factorization-machines:1"

    with timeout(minutes=15):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}

        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        # take 100 examples for faster execution
        vectors = np.array([t.tolist() for t in train_set[0][:100]]).astype('float32')
        labels = np.where(np.array([t.tolist() for t in train_set[1][:100]]) == 0, 1.0, 0.0).astype('float32')

        buf = io.BytesIO()
        write_numpy_to_dense_tensor(buf, vectors, labels)
        buf.seek(0)

        bucket = sagemaker_session.default_bucket()
        prefix = 'test_byo_estimator'
        key = 'recordio-pb-data'
        boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)
        s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key)

        estimator = Estimator(image_name=image_name,
                              role='SageMakerRole', train_instance_count=1,
                              train_instance_type='ml.c4.xlarge',
                              sagemaker_session=sagemaker_session, base_job_name='test-byo')

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type='binary_classifier')

        # training labels must be 'float32'
        estimator.fit({'train': s3_train_data})

    endpoint_name = name_from_base('byo')

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        model = estimator.create_model()
        predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name)
        predictor.serializer = fm_serializer
        predictor.content_type = 'application/json'
        predictor.deserializer = sagemaker.predictor.json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result['predictions']) == 10
        for prediction in result['predictions']:
            assert prediction['score'] is not None

Exemple #12

0

Afficher le fichier

Fichier : test_byo_estimator.py Projet : duasahil8/sagemaker-python-sdk

def test_byo_estimator(sagemaker_session, region):
    """Use Factorization Machines algorithm as an example here.

    First we need to prepare data for training. We take standard data set, convert it to the
    format that the algorithm can process and upload it to S3.
    Then we create the Estimator and set hyperparamets as required by the algorithm.
    Next, we can call fit() with path to the S3.
    Later the trained model is deployed and prediction is called against the endpoint.
    Default predictor is updated with json serializer and deserializer.

    """
    image_name = registry(region) + "/factorization-machines:1"

    with timeout(minutes=15):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}

        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        # take 100 examples for faster execution
        vectors = np.array([t.tolist() for t in train_set[0][:100]]).astype('float32')
        labels = np.where(np.array([t.tolist() for t in train_set[1][:100]]) == 0, 1.0, 0.0).astype('float32')

        buf = io.BytesIO()
        write_numpy_to_dense_tensor(buf, vectors, labels)
        buf.seek(0)

        bucket = sagemaker_session.default_bucket()
        prefix = 'test_byo_estimator'
        key = 'recordio-pb-data'
        boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)
        s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key)

        estimator = Estimator(image_name=image_name,
                              role='SageMakerRole', train_instance_count=1,
                              train_instance_type='ml.c4.xlarge',
                              sagemaker_session=sagemaker_session, base_job_name='test-byo')

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type='binary_classifier')

        # training labels must be 'float32'
        estimator.fit({'train': s3_train_data})

    endpoint_name = name_from_base('byo')

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        model = estimator.create_model()
        predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name)
        predictor.serializer = fm_serializer
        predictor.content_type = 'application/json'
        predictor.deserializer = sagemaker.predictor.json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result['predictions']) == 10
        for prediction in result['predictions']:
            assert prediction['score'] is not None