Ejemplo n.º 1
0
    def __init__(
        self,
        endpoint_name,
        sagemaker_session=None,
        serializer=NumpySerializer(),
        deserializer=NumpyDeserializer(),
    ):
        """Initialize an ``SKLearnPredictor``.

        Args:
            endpoint_name (str): The name of the endpoint to perform inference
                on.
            sagemaker_session (sagemaker.session.Session): Session object which
                manages interactions with Amazon SageMaker APIs and any other
                AWS services needed. If not specified, the estimator creates one
                using the default AWS configuration chain.
            serializer (sagemaker.serializers.BaseSerializer): Optional. Default
                serializes input data to .npy format. Handles lists and numpy
                arrays.
            deserializer (sagemaker.deserializers.BaseDeserializer): Optional.
                Default parses the response from .npy format to numpy array.
        """
        super(SKLearnPredictor, self).__init__(
            endpoint_name,
            sagemaker_session,
            serializer=serializer,
            deserializer=deserializer,
        )
Ejemplo n.º 2
0
def test_multi_data_model_deploy_pretrained_models_local_mode(container_image, sagemaker_session):
    timestamp = sagemaker_timestamp()
    endpoint_name = "test-multimodel-endpoint-{}".format(timestamp)
    model_name = "test-multimodel-{}".format(timestamp)

    # Define pretrained model local path
    pretrained_model_data_local_path = os.path.join(DATA_DIR, "sparkml_model", "mleap_model.tar.gz")

    with timeout(minutes=30):
        model_data_prefix = os.path.join(
            "s3://", sagemaker_session.default_bucket(), "multimodel-{}/".format(timestamp)
        )
        multi_data_model = MultiDataModel(
            name=model_name,
            model_data_prefix=model_data_prefix,
            image_uri=container_image,
            role=ROLE,
            sagemaker_session=sagemaker_session,
        )

        # Add model before deploy
        multi_data_model.add_model(pretrained_model_data_local_path, PRETRAINED_MODEL_PATH_1)
        # Deploy model to an endpoint
        multi_data_model.deploy(1, "local", endpoint_name=endpoint_name)
        # Add models after deploy
        multi_data_model.add_model(pretrained_model_data_local_path, PRETRAINED_MODEL_PATH_2)

        endpoint_models = []
        for model_path in multi_data_model.list_models():
            endpoint_models.append(model_path)
        assert PRETRAINED_MODEL_PATH_1 in endpoint_models
        assert PRETRAINED_MODEL_PATH_2 in endpoint_models

        predictor = Predictor(
            endpoint_name=endpoint_name,
            sagemaker_session=multi_data_model.sagemaker_session,
            serializer=NumpySerializer(),
            deserializer=string_deserializer,
        )

        data = numpy.zeros(shape=(1, 1, 28, 28))
        result = predictor.predict(data, target_model=PRETRAINED_MODEL_PATH_1)
        assert result == "Invoked model: {}".format(PRETRAINED_MODEL_PATH_1)

        result = predictor.predict(data, target_model=PRETRAINED_MODEL_PATH_2)
        assert result == "Invoked model: {}".format(PRETRAINED_MODEL_PATH_2)

        # Cleanup
        multi_data_model.sagemaker_session.sagemaker_client.delete_endpoint_config(
            EndpointConfigName=endpoint_name
        )
        multi_data_model.sagemaker_session.delete_endpoint(endpoint_name)
        multi_data_model.delete_model()
    with pytest.raises(Exception) as exception:
        sagemaker_session.sagemaker_client.describe_model(ModelName=multi_data_model.name)
        assert "Could not find model" in str(exception.value)
        sagemaker_session.sagemaker_client.describe_endpoint_config(name=endpoint_name)
        assert "Could not find endpoint" in str(exception.value)
def test_numpy_serializer_python_array_with_dtype():
    numpy_serializer = NumpySerializer(dtype="float16")
    array = [1, 2, 3]

    result = numpy_serializer.serialize(array)

    deserialized = np.load(io.BytesIO(result))
    assert np.array_equal(array, deserialized)
    assert deserialized.dtype == "float16"
Ejemplo n.º 4
0
    def __init__(self, endpoint_name, sagemaker_session=None):
        """Initialize an ``PyTorchPredictor``.

        Args:
            endpoint_name (str): The name of the endpoint to perform inference
                on.
            sagemaker_session (sagemaker.session.Session): Session object which
                manages interactions with Amazon SageMaker APIs and any other
                AWS services needed. If not specified, the estimator creates one
                using the default AWS configuration chain.
        """
        super(PyTorchPredictor, self).__init__(
            endpoint_name, sagemaker_session, NumpySerializer(), NumpyDeserializer()
        )
Ejemplo n.º 5
0
        production_variants = endpoint_config["ProductionVariants"]
        return [d["ModelName"] for d in production_variants]

    @property
    def content_type(self):
        """The MIME type of the data sent to the inference endpoint."""
        return self.serializer.CONTENT_TYPE

    @property
    def accept(self):
        """The content type(s) that are expected from the inference endpoint."""
        return self.deserializer.ACCEPT

    @property
    def endpoint(self):
        """Deprecated attribute. Please use endpoint_name."""
        renamed_warning("The endpoint attribute")
        return self.endpoint_name


csv_serializer = deprecated_serialize(CSVSerializer(), "csv_serializer")
json_serializer = deprecated_serialize(JSONSerializer(), "json_serializer")
npy_serializer = deprecated_serialize(NumpySerializer(), "npy_serializer")
csv_deserializer = deprecated_deserialize(CSVDeserializer(),
                                          "csv_deserializer")
json_deserializer = deprecated_deserialize(JSONDeserializer(),
                                           "json_deserializer")
numpy_deserializer = deprecated_deserialize(NumpyDeserializer(),
                                            "numpy_deserializer")
RealTimePredictor = deprecated_class(Predictor, "RealTimePredictor")
Ejemplo n.º 6
0
def test_multi_data_model_deploy_pretrained_models_update_endpoint(
    container_image, sagemaker_session, cpu_instance_type, alternative_cpu_instance_type
):
    timestamp = sagemaker_timestamp()
    endpoint_name = "test-multimodel-endpoint-{}".format(timestamp)
    model_name = "test-multimodel-{}".format(timestamp)

    # Define pretrained model local path
    pretrained_model_data_local_path = os.path.join(DATA_DIR, "sparkml_model", "mleap_model.tar.gz")

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        model_data_prefix = os.path.join(
            "s3://", sagemaker_session.default_bucket(), "multimodel-{}/".format(timestamp)
        )
        multi_data_model = MultiDataModel(
            name=model_name,
            model_data_prefix=model_data_prefix,
            image_uri=container_image,
            role=ROLE,
            sagemaker_session=sagemaker_session,
        )

        # Add model before deploy
        multi_data_model.add_model(pretrained_model_data_local_path, PRETRAINED_MODEL_PATH_1)
        # Deploy model to an endpoint
        multi_data_model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name)
        # Add model after deploy
        multi_data_model.add_model(pretrained_model_data_local_path, PRETRAINED_MODEL_PATH_2)

        # List model assertions
        endpoint_models = []
        for model_path in multi_data_model.list_models():
            endpoint_models.append(model_path)
        assert PRETRAINED_MODEL_PATH_1 in endpoint_models
        assert PRETRAINED_MODEL_PATH_2 in endpoint_models

        predictor = Predictor(
            endpoint_name=endpoint_name,
            sagemaker_session=sagemaker_session,
            serializer=NumpySerializer(),
            deserializer=string_deserializer,
        )

        data = numpy.zeros(shape=(1, 1, 28, 28))
        result = predictor.predict(data, target_model=PRETRAINED_MODEL_PATH_1)
        assert result == "Invoked model: {}".format(PRETRAINED_MODEL_PATH_1)

        result = predictor.predict(data, target_model=PRETRAINED_MODEL_PATH_2)
        assert result == "Invoked model: {}".format(PRETRAINED_MODEL_PATH_2)

        endpoint_desc = sagemaker_session.sagemaker_client.describe_endpoint(
            EndpointName=endpoint_name
        )
        old_config_name = endpoint_desc["EndpointConfigName"]

        # Update endpoint
        predictor.update_endpoint(
            initial_instance_count=1, instance_type=alternative_cpu_instance_type
        )

        endpoint_desc = sagemaker_session.sagemaker_client.describe_endpoint(
            EndpointName=endpoint_name
        )
        new_config_name = endpoint_desc["EndpointConfigName"]

        new_config = sagemaker_session.sagemaker_client.describe_endpoint_config(
            EndpointConfigName=new_config_name
        )
        assert old_config_name != new_config_name
        assert new_config["ProductionVariants"][0]["InstanceType"] == alternative_cpu_instance_type
        assert new_config["ProductionVariants"][0]["InitialInstanceCount"] == 1

        # Cleanup
        sagemaker_session.sagemaker_client.delete_endpoint_config(
            EndpointConfigName=old_config_name
        )
        sagemaker_session.sagemaker_client.delete_endpoint_config(
            EndpointConfigName=new_config_name
        )
        multi_data_model.delete_model()

    with pytest.raises(Exception) as exception:
        sagemaker_session.sagemaker_client.describe_model(ModelName=model_name)
        assert "Could not find model" in str(exception.value)
        sagemaker_session.sagemaker_client.describe_endpoint_config(name=old_config_name)
        assert "Could not find endpoint" in str(exception.value)
        sagemaker_session.sagemaker_client.describe_endpoint_config(name=new_config_name)
        assert "Could not find endpoint" in str(exception.value)
Ejemplo n.º 7
0
def test_multi_data_model_deploy_train_model_from_amazon_first_party_estimator(
    container_image, sagemaker_session, cpu_instance_type
):
    timestamp = sagemaker_timestamp()
    endpoint_name = "test-multimodel-endpoint-{}".format(timestamp)
    model_name = "test-multimodel-{}".format(timestamp)

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        rcf_model_v1 = __rcf_training_job(
            sagemaker_session, container_image, cpu_instance_type, 50, 20
        )

        model_data_prefix = os.path.join(
            "s3://", sagemaker_session.default_bucket(), "multimodel-{}/".format(timestamp)
        )
        multi_data_model = MultiDataModel(
            name=model_name,
            model_data_prefix=model_data_prefix,
            model=rcf_model_v1,
            sagemaker_session=sagemaker_session,
        )

        # Add model before deploy
        multi_data_model.add_model(rcf_model_v1.model_data, PRETRAINED_MODEL_PATH_1)
        # Deploy model to an endpoint
        multi_data_model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name)
        # Train another model
        rcf_model_v2 = __rcf_training_job(
            sagemaker_session, container_image, cpu_instance_type, 70, 20
        )
        # Deploy newly trained model
        multi_data_model.add_model(rcf_model_v2.model_data, PRETRAINED_MODEL_PATH_2)

        # List model assertions
        endpoint_models = []
        for model_path in multi_data_model.list_models():
            endpoint_models.append(model_path)
        assert PRETRAINED_MODEL_PATH_1 in endpoint_models
        assert PRETRAINED_MODEL_PATH_2 in endpoint_models

        # Define a predictor to set `serializer` parameter with `NumpySerializer`
        # instead of `JSONSerializer` in the default predictor returned by `MXNetPredictor`
        # Since we are using a placeholder container image the prediction results are not accurate.
        predictor = Predictor(
            endpoint_name=endpoint_name,
            sagemaker_session=sagemaker_session,
            serializer=NumpySerializer(),
            deserializer=string_deserializer,
        )

        data = numpy.random.rand(1, 14)
        # Prediction result for the first model
        result = predictor.predict(data, target_model=PRETRAINED_MODEL_PATH_1)
        assert result == "Invoked model: {}".format(PRETRAINED_MODEL_PATH_1)

        # Prediction result for the second model
        result = predictor.predict(data, target_model=PRETRAINED_MODEL_PATH_2)
        assert result == "Invoked model: {}".format(PRETRAINED_MODEL_PATH_2)

        # Cleanup
        sagemaker_session.sagemaker_client.delete_endpoint_config(EndpointConfigName=endpoint_name)
        multi_data_model.delete_model()
    with pytest.raises(Exception) as exception:
        sagemaker_session.sagemaker_client.describe_model(ModelName=model_name)
        assert "Could not find model" in str(exception.value)
        sagemaker_session.sagemaker_client.describe_endpoint_config(name=endpoint_name)
        assert "Could not find endpoint" in str(exception.value)
def numpy_serializer():
    return NumpySerializer()
Ejemplo n.º 9
0
 def attach_predictor(self):
     self.predictor = SagemakerPredictor(endpoint_name=self.endpoint_name,
                                         sagemaker_session=self.session,
                                         serializer=NumpySerializer(),
                                         deserializer=NumpyDeserializer())