def test_multi_model_predict_call(): sagemaker_session = empty_sagemaker_session() predictor = Predictor(ENDPOINT, sagemaker_session) data = "untouched" result = predictor.predict(data, target_model="model.tar.gz") assert sagemaker_session.sagemaker_runtime_client.invoke_endpoint.called expected_request_args = { "Accept": DEFAULT_ACCEPT, "Body": data, "ContentType": DEFAULT_CONTENT_TYPE, "EndpointName": ENDPOINT, "TargetModel": "model.tar.gz", } call_args, kwargs = sagemaker_session.sagemaker_runtime_client.invoke_endpoint.call_args assert kwargs == expected_request_args assert result == RETURN_VALUE
def test_predict_call_with_inference_id(): sagemaker_session = empty_sagemaker_session() predictor = Predictor(ENDPOINT, sagemaker_session) data = "untouched" result = predictor.predict(data, inference_id=INFERENCE_ID) assert sagemaker_session.sagemaker_runtime_client.invoke_endpoint.called expected_request_args = { "Accept": DEFAULT_ACCEPT, "Body": data, "ContentType": DEFAULT_CONTENT_TYPE, "EndpointName": ENDPOINT, "InferenceId": INFERENCE_ID, } call_args, kwargs = sagemaker_session.sagemaker_runtime_client.invoke_endpoint.call_args assert kwargs == expected_request_args assert result == RETURN_VALUE
def test_predict_call_with_target_variant(): sagemaker_session = empty_sagemaker_session() predictor = Predictor(ENDPOINT, sagemaker_session) data = "untouched" result = predictor.predict(data, target_variant=PRODUCTION_VARIANT_1) assert sagemaker_session.sagemaker_runtime_client.invoke_endpoint.called expected_request_args = { "Accept": DEFAULT_ACCEPT, "Body": data, "ContentType": DEFAULT_CONTENT_TYPE, "EndpointName": ENDPOINT, "TargetVariant": PRODUCTION_VARIANT_1, } call_args, kwargs = sagemaker_session.sagemaker_runtime_client.invoke_endpoint.call_args assert kwargs == expected_request_args assert result == RETURN_VALUE
def test_wrong_waiter_config_object(): predictor_async = AsyncPredictor(Predictor(ENDPOINT_NAME)) async_inference_response = AsyncInferenceResponse( output_path=DEFAULT_OUTPUT_PATH, predictor_async=predictor_async, ) with pytest.raises( ValueError, match="waiter_config should be a WaiterConfig object", ): async_inference_response.get_result(waiter_config={})
def test_predict_call_with_json(): sagemaker_session = json_sagemaker_session() predictor = Predictor(ENDPOINT, sagemaker_session, serializer=JSONSerializer()) data = [1, 2] result = predictor.predict(data) assert sagemaker_session.sagemaker_runtime_client.invoke_endpoint.called expected_request_args = { "Accept": DEFAULT_ACCEPT, "Body": json.dumps(data), "ContentType": "application/json", "EndpointName": ENDPOINT, } call_args, kwargs = sagemaker_session.sagemaker_runtime_client.invoke_endpoint.call_args assert kwargs == expected_request_args assert result == json.dumps([RETURN_VALUE])
def test_list_monitors_unknown_monitoring_type(): sagemaker_session = empty_sagemaker_session() sagemaker_session.list_monitoring_schedules = Mock( return_value={ "MonitoringScheduleSummaries": [ { "MonitoringScheduleName": "model-explainability-monitor", "MonitoringType": "UnknownType", }, ] }) sagemaker_session.describe_monitoring_schedule = Mock(side_effect=[ { "MonitoringScheduleConfig": { "MonitoringType": "UnknownType", "MonitoringJobDefinitionName": "unknown-job-definition", } }, ]) predictor = Predictor(ENDPOINT, sagemaker_session=sagemaker_session) with pytest.raises(TypeError): predictor.list_monitors()
def test_predict_call_pass_through(): sagemaker_session = empty_sagemaker_session() predictor = Predictor(ENDPOINT, sagemaker_session) data = "untouched" result = predictor.predict(data) assert sagemaker_session.sagemaker_runtime_client.invoke_endpoint.called assert sagemaker_session.sagemaker_client.describe_endpoint.not_called assert sagemaker_session.sagemaker_client.describe_endpoint_config.not_called expected_request_args = { "Accept": DEFAULT_ACCEPT, "Body": data, "ContentType": DEFAULT_CONTENT_TYPE, "EndpointName": ENDPOINT, } call_args, kwargs = sagemaker_session.sagemaker_runtime_client.invoke_endpoint.call_args assert kwargs == expected_request_args assert result == RETURN_VALUE
def test_predict_call_with_csv(): sagemaker_session = ret_csv_sagemaker_session() predictor = Predictor(ENDPOINT, sagemaker_session, serializer=CSVSerializer(), deserializer=CSVDeserializer()) data = [1, 2] result = predictor.predict(data) assert sagemaker_session.sagemaker_runtime_client.invoke_endpoint.called expected_request_args = { "Accept": CSV_CONTENT_TYPE, "Body": "1,2", "ContentType": CSV_CONTENT_TYPE, "EndpointName": ENDPOINT, } call_args, kwargs = sagemaker_session.sagemaker_runtime_client.invoke_endpoint.call_args assert kwargs == expected_request_args assert result == [["1", "2", "3"]]
def test_predict_async_call_invalid_input(): sagemaker_session = empty_sagemaker_session() predictor_async = AsyncPredictor(Predictor(ENDPOINT, sagemaker_session)) with pytest.raises( ValueError, match= "Please provide input data or input Amazon S3 location to use async prediction", ): predictor_async.predict_async() with pytest.raises( ValueError, match= "Please provide input data or input Amazon S3 location to use async prediction", ): predictor_async.predict()
def test_get_result(): predictor_async = AsyncPredictor(Predictor(ENDPOINT_NAME)) predictor_async.s3_client = empty_s3_client() async_inference_response = AsyncInferenceResponse( output_path=DEFAULT_OUTPUT_PATH, predictor_async=predictor_async, ) with pytest.raises(UnexpectedClientError): async_inference_response.get_result() with pytest.raises(ObjectNotExistedError, match="Inference could still be running"): async_inference_response.get_result() result = async_inference_response.get_result() assert async_inference_response._result == result assert result == RETURN_VALUE
def test_async_predict_call_pass_through(): sagemaker_session = empty_sagemaker_session() predictor_async = AsyncPredictor(Predictor(ENDPOINT, sagemaker_session)) result = predictor_async.predict_async(input_path=ASYNC_INPUT_LOCATION) assert sagemaker_session.sagemaker_runtime_client.invoke_endpoint_async.called assert sagemaker_session.sagemaker_client.describe_endpoint.not_called assert sagemaker_session.sagemaker_client.describe_endpoint_config.not_called expected_request_args = { "Accept": DEFAULT_ACCEPT, "InputLocation": ASYNC_INPUT_LOCATION, "EndpointName": ENDPOINT, } call_args, kwargs = sagemaker_session.sagemaker_runtime_client.invoke_endpoint_async.call_args assert kwargs == expected_request_args assert result.output_path == ASYNC_OUTPUT_LOCATION
def test_predict_call_with_inference_id(): sagemaker_session = empty_sagemaker_session() predictor_async = AsyncPredictor(Predictor(ENDPOINT, sagemaker_session)) input_location = "s3://some-input-path" result = predictor_async.predict_async(input_path=input_location, inference_id=INFERENCE_ID) assert sagemaker_session.sagemaker_runtime_client.invoke_endpoint_async.called expected_request_args = { "Accept": DEFAULT_ACCEPT, "InputLocation": input_location, "EndpointName": ENDPOINT, "InferenceId": INFERENCE_ID, } call_args, kwargs = sagemaker_session.sagemaker_runtime_client.invoke_endpoint_async.call_args assert kwargs == expected_request_args assert result.output_path == ASYNC_OUTPUT_LOCATION
def test_async_predict_call_with_data(): sagemaker_session = empty_sagemaker_session() predictor_async = AsyncPredictor(Predictor(ENDPOINT, sagemaker_session)) predictor_async.name = ASYNC_PREDICTOR data = DUMMY_DATA result = predictor_async.predict_async(data=data) assert sagemaker_session.s3_client.put_object.called assert sagemaker_session.sagemaker_runtime_client.invoke_endpoint_async.called assert sagemaker_session.sagemaker_client.describe_endpoint.not_called assert sagemaker_session.sagemaker_client.describe_endpoint_config.not_called expected_request_args = { "Accept": DEFAULT_ACCEPT, "InputLocation": predictor_async._input_path, "EndpointName": ENDPOINT, } call_args, kwargs = sagemaker_session.sagemaker_runtime_client.invoke_endpoint_async.call_args assert kwargs == expected_request_args assert result.output_path == ASYNC_OUTPUT_LOCATION
def test_update_endpoint_instance_type_and_count(name_from_base, production_variant): new_endpoint_config_name = "new-endpoint-config" name_from_base.return_value = new_endpoint_config_name sagemaker_session = empty_sagemaker_session() existing_endpoint_config_name = "existing-endpoint-config" existing_model_name = "existing-model" predictor = Predictor(ENDPOINT, sagemaker_session=sagemaker_session) predictor._endpoint_config_name = existing_endpoint_config_name predictor._model_names = [existing_model_name] new_instance_count = 2 new_instance_type = "ml.c4.xlarge" predictor.update_endpoint( initial_instance_count=new_instance_count, instance_type=new_instance_type, ) assert [existing_model_name] == predictor._model_names assert new_endpoint_config_name == predictor._endpoint_config_name production_variant.assert_called_with( existing_model_name, new_instance_type, initial_instance_count=new_instance_count, accelerator_type=None, ) sagemaker_session.create_endpoint_config_from_existing.assert_called_with( existing_endpoint_config_name, new_endpoint_config_name, new_tags=None, new_kms_key=None, new_data_capture_config_dict=None, new_production_variants=[production_variant.return_value], ) sagemaker_session.update_endpoint.assert_called_with( ENDPOINT, new_endpoint_config_name, wait=True)
import falcon app = application = falcon.App(cors_enable=True) from sagemaker.predictor import Predictor endpoint = "tensorflow-inference-2021-04-28-15-31-51-433" predictor = Predictor(endpoint_name=endpoint) import tensorflow as tf import json from gpt2_tokenizer import GPT2Tokenizer import time tokenizer = GPT2Tokenizer( 'CPM-Generate/bpe_3w_new/vocab.json', 'CPM-Generate/bpe_3w_new/merges.txt', model_file='CPM-Generate/bpe_3w_new/chinese_vocab.model') class Generate(): def __init__(self, tokenizer): self.tokenizer = tokenizer def on_post(self, req, resp): body = json.loads(req.stream.read()) text = body["text"] t0 = time.time() length = 300 inputs = tf.constant([tokenizer.encode(text)], dtype=tf.int64) inputs = inputs.numpy().tolist() inputs = inputs[0]
def init_predict_process(func, endpoint_name, quantiles): func.predictor = Predictor( endpoint_name, serializer=sagemaker.serializers.JSONSerializer(), deserializer=sagemaker.deserializers.JSONDeserializer()) func.quantiles = quantiles
import io import boto3 import json import sagemaker from sagemaker.predictor import Predictor from sagemaker.serializers import JSONLinesSerializer from sagemaker.deserializers import JSONLinesDeserializer # grab environment variables ENDPOINT_NAME = os.environ["ENDPOINT_NAME"] print("Endpoint: {}".format(ENDPOINT_NAME)) sess = sagemaker.Session() predictor = Predictor(endpoint_name=ENDPOINT_NAME, serializer=JSONLinesSerializer(), deserializer=JSONLinesDeserializer(), sagemaker_session=sess) def lambda_handler(event, context): outputs = [] r = event["records"] print("records: {}".format(r)) print("type_records: {}".format(type(r))) # TODO: Handle batches for record in event["records"]: print(record["recordId"]) payload = base64.b64decode(record["data"]) print("payload: {}".format(payload))
from sagemaker.predictor import Predictor from sagemaker.serializers import CSVDeserializer, CSVSerializer import time endpoint_name = "xgb-churn-monitor" predictor = Predictor( endpoint_name=endpoint_name, deserializer=CSVDeserializer(), serializer=CSVSerializer(), ) # get a subset of test data for a quick test #!head -120 test_data/test-dataset-input-cols.csv > test_data/test_sample.csv print("Sending test traffic to the endpoint {}. \nPlease wait...".format( endpoint_name)) with open("test_data/test_sample.csv", "r") as f: for row in f: payload = row.rstrip("\n") response = predictor.predict(data=payload) time.sleep(0.5) print("Done!")
def test_multi_data_model_deploy_pretrained_models_local_mode( container_image, sagemaker_session): timestamp = sagemaker_timestamp() endpoint_name = "test-multimodel-endpoint-{}".format(timestamp) model_name = "test-multimodel-{}".format(timestamp) # Define pretrained model local path pretrained_model_data_local_path = os.path.join(DATA_DIR, "sparkml_model", "mleap_model.tar.gz") with timeout(minutes=30): model_data_prefix = os.path.join("s3://", sagemaker_session.default_bucket(), "multimodel-{}/".format(timestamp)) multi_data_model = MultiDataModel( name=model_name, model_data_prefix=model_data_prefix, image_uri=container_image, role=ROLE, sagemaker_session=sagemaker_session, ) # Add model before deploy multi_data_model.add_model(pretrained_model_data_local_path, PRETRAINED_MODEL_PATH_1) # Deploy model to an endpoint multi_data_model.deploy(1, "local", endpoint_name=endpoint_name) # Add models after deploy multi_data_model.add_model(pretrained_model_data_local_path, PRETRAINED_MODEL_PATH_2) endpoint_models = [] for model_path in multi_data_model.list_models(): endpoint_models.append(model_path) assert PRETRAINED_MODEL_PATH_1 in endpoint_models assert PRETRAINED_MODEL_PATH_2 in endpoint_models predictor = Predictor( endpoint_name=endpoint_name, sagemaker_session=multi_data_model.sagemaker_session, serializer=NumpySerializer(), deserializer=string_deserializer, ) data = numpy.zeros(shape=(1, 1, 28, 28)) result = predictor.predict(data, target_model=PRETRAINED_MODEL_PATH_1) assert result == "Invoked model: {}".format(PRETRAINED_MODEL_PATH_1) result = predictor.predict(data, target_model=PRETRAINED_MODEL_PATH_2) assert result == "Invoked model: {}".format(PRETRAINED_MODEL_PATH_2) # Cleanup multi_data_model.sagemaker_session.sagemaker_client.delete_endpoint_config( EndpointConfigName=endpoint_name) multi_data_model.sagemaker_session.delete_endpoint(endpoint_name) multi_data_model.delete_model() with pytest.raises(Exception) as exception: sagemaker_session.sagemaker_client.describe_model( ModelName=multi_data_model.name) assert "Could not find model" in str(exception.value) sagemaker_session.sagemaker_client.describe_endpoint_config( name=endpoint_name) assert "Could not find endpoint" in str(exception.value)
s3_capture_upload_path = f's3://{bucket}/{prefix}/endpoint-data-capture/' #example: s3://bucket-name/path/to/endpoint-data-capture/ # Change parameters as you would like - adjust sampling percentage, # chose to capture request or response or both data_capture_config = DataCaptureConfig( enable_capture = True, sampling_percentage=25, destination_s3_uri=s3_capture_upload_path, kms_key_id=None, capture_options=["REQUEST", "RESPONSE"], csv_content_types=["text/csv"], json_content_types=["application/json"] ) # Now it is time to apply the new configuration predictor = Predictor(endpoint_name=endpoint_name, sagemaker_session=sagemaker_session) predictor.update_data_capture_config(data_capture_config=data_capture_config) print('Created Predictor at endpoint {}'.format(endpoint_name)) baseline_data_uri = args.baseline_data_uri ##'s3://bucketname/path/to/baseline/data' - Where your validation data is baseline_results_uri = f's3://{bucket}/{prefix}/baseline/results' ##'s3://bucketname/path/to/baseline/data' - Where the results are to be stored in print('Baseline data is at {}'.format(baseline_data_uri)) my_default_monitor = DefaultModelMonitor( role=get_execution_role(sagemaker_session=sagemaker_session), sagemaker_session=sagemaker_session, instance_count=2, instance_type='ml.m5.4xlarge', volume_size_in_gb=60,
def test_multi_data_model_deploy_pretrained_models_update_endpoint( container_image, sagemaker_session, cpu_instance_type, alternative_cpu_instance_type ): timestamp = sagemaker_timestamp() endpoint_name = "test-multimodel-endpoint-{}".format(timestamp) model_name = "test-multimodel-{}".format(timestamp) # Define pretrained model local path pretrained_model_data_local_path = os.path.join(DATA_DIR, "sparkml_model", "mleap_model.tar.gz") with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model_data_prefix = os.path.join( "s3://", sagemaker_session.default_bucket(), "multimodel-{}/".format(timestamp) ) multi_data_model = MultiDataModel( name=model_name, model_data_prefix=model_data_prefix, image_uri=container_image, role=ROLE, sagemaker_session=sagemaker_session, ) # Add model before deploy multi_data_model.add_model(pretrained_model_data_local_path, PRETRAINED_MODEL_PATH_1) # Deploy model to an endpoint multi_data_model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name) # Add model after deploy multi_data_model.add_model(pretrained_model_data_local_path, PRETRAINED_MODEL_PATH_2) # List model assertions endpoint_models = [] for model_path in multi_data_model.list_models(): endpoint_models.append(model_path) assert PRETRAINED_MODEL_PATH_1 in endpoint_models assert PRETRAINED_MODEL_PATH_2 in endpoint_models predictor = Predictor( endpoint_name=endpoint_name, sagemaker_session=sagemaker_session, serializer=NumpySerializer(), deserializer=string_deserializer, ) data = numpy.zeros(shape=(1, 1, 28, 28)) result = predictor.predict(data, target_model=PRETRAINED_MODEL_PATH_1) assert result == "Invoked model: {}".format(PRETRAINED_MODEL_PATH_1) result = predictor.predict(data, target_model=PRETRAINED_MODEL_PATH_2) assert result == "Invoked model: {}".format(PRETRAINED_MODEL_PATH_2) endpoint_desc = sagemaker_session.sagemaker_client.describe_endpoint( EndpointName=endpoint_name ) old_config_name = endpoint_desc["EndpointConfigName"] # Update endpoint predictor.update_endpoint( initial_instance_count=1, instance_type=alternative_cpu_instance_type ) endpoint_desc = sagemaker_session.sagemaker_client.describe_endpoint( EndpointName=endpoint_name ) new_config_name = endpoint_desc["EndpointConfigName"] new_config = sagemaker_session.sagemaker_client.describe_endpoint_config( EndpointConfigName=new_config_name ) assert old_config_name != new_config_name assert new_config["ProductionVariants"][0]["InstanceType"] == alternative_cpu_instance_type assert new_config["ProductionVariants"][0]["InitialInstanceCount"] == 1 # Cleanup sagemaker_session.sagemaker_client.delete_endpoint_config( EndpointConfigName=old_config_name ) sagemaker_session.sagemaker_client.delete_endpoint_config( EndpointConfigName=new_config_name ) multi_data_model.delete_model() with pytest.raises(Exception) as exception: sagemaker_session.sagemaker_client.describe_model(ModelName=model_name) assert "Could not find model" in str(exception.value) sagemaker_session.sagemaker_client.describe_endpoint_config(name=old_config_name) assert "Could not find endpoint" in str(exception.value) sagemaker_session.sagemaker_client.describe_endpoint_config(name=new_config_name) assert "Could not find endpoint" in str(exception.value)
def test_multi_data_model_deploy_train_model_from_amazon_first_party_estimator( container_image, sagemaker_session, cpu_instance_type ): timestamp = sagemaker_timestamp() endpoint_name = "test-multimodel-endpoint-{}".format(timestamp) model_name = "test-multimodel-{}".format(timestamp) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): rcf_model_v1 = __rcf_training_job( sagemaker_session, container_image, cpu_instance_type, 50, 20 ) model_data_prefix = os.path.join( "s3://", sagemaker_session.default_bucket(), "multimodel-{}/".format(timestamp) ) multi_data_model = MultiDataModel( name=model_name, model_data_prefix=model_data_prefix, model=rcf_model_v1, sagemaker_session=sagemaker_session, ) # Add model before deploy multi_data_model.add_model(rcf_model_v1.model_data, PRETRAINED_MODEL_PATH_1) # Deploy model to an endpoint multi_data_model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name) # Train another model rcf_model_v2 = __rcf_training_job( sagemaker_session, container_image, cpu_instance_type, 70, 20 ) # Deploy newly trained model multi_data_model.add_model(rcf_model_v2.model_data, PRETRAINED_MODEL_PATH_2) # List model assertions endpoint_models = [] for model_path in multi_data_model.list_models(): endpoint_models.append(model_path) assert PRETRAINED_MODEL_PATH_1 in endpoint_models assert PRETRAINED_MODEL_PATH_2 in endpoint_models # Define a predictor to set `serializer` parameter with `NumpySerializer` # instead of `JSONSerializer` in the default predictor returned by `MXNetPredictor` # Since we are using a placeholder container image the prediction results are not accurate. predictor = Predictor( endpoint_name=endpoint_name, sagemaker_session=sagemaker_session, serializer=NumpySerializer(), deserializer=string_deserializer, ) data = numpy.random.rand(1, 14) # Prediction result for the first model result = predictor.predict(data, target_model=PRETRAINED_MODEL_PATH_1) assert result == "Invoked model: {}".format(PRETRAINED_MODEL_PATH_1) # Prediction result for the second model result = predictor.predict(data, target_model=PRETRAINED_MODEL_PATH_2) assert result == "Invoked model: {}".format(PRETRAINED_MODEL_PATH_2) # Cleanup sagemaker_session.sagemaker_client.delete_endpoint_config(EndpointConfigName=endpoint_name) multi_data_model.delete_model() with pytest.raises(Exception) as exception: sagemaker_session.sagemaker_client.describe_model(ModelName=model_name) assert "Could not find model" in str(exception.value) sagemaker_session.sagemaker_client.describe_endpoint_config(name=endpoint_name) assert "Could not find endpoint" in str(exception.value)
def predict_wrapper(endpoint, session): return Predictor(endpoint, session, serializer, deserializer)
def predict_wrapper(endpoint, session): return Predictor(endpoint, session)
def attach_predictor(self): self.predictor = SagemakerPredictor(endpoint_name=self.endpoint_name, sagemaker_session=self.session, serializer=NumpySerializer(), deserializer=NumpyDeserializer())
def create_predictor(self, endpoint_name): return Predictor(endpoint_name, sagemaker_session=self.sagemaker_session)
def test_list_monitors(default_model_monitor_attach, *attach_methods): sagemaker_session = empty_sagemaker_session() sagemaker_session.list_monitoring_schedules = Mock( return_value={ "MonitoringScheduleSummaries": [ { "MonitoringScheduleName": "default-monitor", }, { "MonitoringScheduleName": "byoc-monitor", }, { "MonitoringScheduleName": "data-quality-monitor", "MonitoringType": "DataQuality", }, { "MonitoringScheduleName": "model-quality-monitor", "MonitoringType": "ModelQuality", }, { "MonitoringScheduleName": "model-bias-monitor", "MonitoringType": "ModelBias", }, { "MonitoringScheduleName": "model-explainability-monitor", "MonitoringType": "ModelExplainability", }, ] }) sagemaker_session.describe_monitoring_schedule = Mock(side_effect=[ { "MonitoringScheduleConfig": { "MonitoringJobDefinition": { "MonitoringAppSpecification": { "ImageUri": DEFAULT_REPOSITORY_NAME, } } } }, { "MonitoringScheduleConfig": { "MonitoringJobDefinition": { "MonitoringAppSpecification": { "ImageUri": "byoc-image", } } } }, { "MonitoringScheduleConfig": { "MonitoringType": "DataQuality", "MonitoringJobDefinitionName": "data-quality-job-definition", } }, { "MonitoringScheduleConfig": { "MonitoringType": "ModelQuality", "MonitoringJobDefinitionName": "model-quality-job-definition", } }, ]) predictor = Predictor(ENDPOINT, sagemaker_session=sagemaker_session) predictor.list_monitors() for attach_method in attach_methods: attach_method.assert_called_once() assert default_model_monitor_attach.call_count == 2