def test_attach_deploy(mxnet_training_job, sagemaker_session, cpu_instance_type, cpu_instance_family): endpoint_name = unique_name_from_base("test-neo-attach-deploy") with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): estimator = MXNet.attach(mxnet_training_job, sagemaker_session=sagemaker_session) estimator.compile_model( target_instance_family=cpu_instance_family, input_shape={"data": [1, 1, 28, 28]}, output_path=estimator.output_path, ) serializer = JSONSerializer() serializer.CONTENT_TYPE = "application/vnd+python.numpy+binary" predictor = estimator.deploy( 1, cpu_instance_type, serializer=serializer, use_compiled_model=True, endpoint_name=endpoint_name, ) data = numpy.zeros(shape=(1, 1, 28, 28)) predictor.predict(data)
def __init__( self, endpoint_name, sagemaker_session=None, serializer=JSONSerializer(), deserializer=JSONDeserializer(), ): """Initialize an ``HuggingFacePredictor``. Args: endpoint_name (str): The name of the endpoint to perform inference on. sagemaker_session (sagemaker.session.Session): Session object which manages interactions with Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. serializer (sagemaker.serializers.BaseSerializer): Optional. Default serializes input data to .npy format. Handles lists and numpy arrays. deserializer (sagemaker.deserializers.BaseDeserializer): Optional. Default parses the response from .npy format to numpy array. """ super(HuggingFacePredictor, self).__init__( endpoint_name, sagemaker_session, serializer=serializer, deserializer=deserializer, )
def __init__( self, endpoint_name, sagemaker_session=None, serializer=JSONSerializer(), deserializer=JSONDeserializer(), ): """Initialize an ``MXNetPredictor``. Args: endpoint_name (str): The name of the endpoint to perform inference on. sagemaker_session (sagemaker.session.Session): Session object which manages interactions with Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. serializer (callable): Optional. Default serializes input data to json. Handles dicts, lists, and numpy arrays. deserializer (callable): Optional. Default parses the response using ``json.load(...)``. """ super(MXNetPredictor, self).__init__( endpoint_name, sagemaker_session, serializer=serializer, deserializer=deserializer, )
def _test_hub_model(sagemaker_session, framework_version, ecr_image, instance_type, model_dir, accelerator_type=None): endpoint_name = sagemaker.utils.unique_name_from_base("sagemaker-huggingface-serving-hub-model") env = { "HF_MODEL_ID": "sshleifer/tiny-distilbert-base-uncased-finetuned-sst-2-english", "HF_TASK": "text-classification", } hf_model = Model( env=env, role="SageMakerRole", image_uri=ecr_image, sagemaker_session=sagemaker_session, predictor_cls=Predictor, ) with timeout_and_delete_endpoint(endpoint_name, sagemaker_session, minutes=30): predictor = hf_model.deploy( initial_instance_count=1, instance_type=instance_type, endpoint_name=endpoint_name, ) data = { "inputs": "Camera - You are awarded a SiPix Digital Camera! call 09061221066 fromm landline. Delivery within 28 days." } predictor.serializer = JSONSerializer() predictor.deserializer = JSONDeserializer() output = predictor.predict(data) assert "score" in output[0]
def test_inferentia_deploy_model( mxnet_training_job, sagemaker_session, inf_instance_type, inf_instance_family, inferentia_mxnet_latest_version, inferentia_mxnet_latest_py_version, ): endpoint_name = unique_name_from_base("test-neo-deploy-model") with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): desc = sagemaker_session.sagemaker_client.describe_training_job( TrainingJobName=mxnet_training_job) model_data = desc["ModelArtifacts"]["S3ModelArtifacts"] script_path = os.path.join(DATA_DIR, "mxnet_mnist", "mnist_neo.py") role = "SageMakerRole" model = MXNetModel( model_data, role, entry_point=script_path, framework_version=inferentia_mxnet_latest_version, py_version=inferentia_mxnet_latest_py_version, sagemaker_session=sagemaker_session, ) model.compile( target_instance_family=inf_instance_family, input_shape={ "data": [1, 1, 28, 28], "softmax_label": [1] }, role=role, job_name=unique_name_from_base( "test-deploy-model-compilation-job"), output_path="/".join(model_data.split("/")[:-1]), ) serializer = JSONSerializer() serializer.CONTENT_TYPE = "application/vnd+python.numpy+binary" predictor = model.deploy(1, inf_instance_type, serializer=serializer, endpoint_name=endpoint_name) data = numpy.zeros(shape=(1, 1, 28, 28)) predictor.predict(data)
def predict(x): try: target_model = f'roberta-base-{random.randint(0,max_models)}.tar.gz' test_data = {"text": 'hello world how are you'} jsons = JSONSerializer() payload = jsons.serialize(test_data) client_start = time.time() response = runtime_sm_client.invoke_endpoint( EndpointName=endpoint_name, ContentType=content_type, TargetModel=target_model, Body=payload) client_end = time.time() result = (client_end - client_start) * 1000 return result except botocore.exceptions.ClientError as error: # Put your error handling logic here print(error) pass
def _assert_prediction(predictor): predictor.serializer = JSONSerializer() predictor.deserializer = JSONDeserializer() data = { "inputs": "Camera - You are awarded a SiPix Digital Camera! call 09061221066 fromm landline. Delivery within 28 days." } output = predictor.predict(data) assert "score" in output[0]
def do_inference_on_local_endpoint(predictor): print(f'\nStarting Inference on endpoint (local).') x_test = np.load('./data/test/x_test.npy') y_test = np.load('./data/test/y_test.npy') data = {"instances": x_test[:10]} predictor.serializer = JSONSerializer() predictor.deserializer = JSONDeserializer() results = predictor.predict(data)['predictions'] flat_list = [float('%.1f' % (item)) for sublist in results for item in sublist] print('predictions: \t{}'.format(np.array(flat_list))) print('target values: \t{}'.format(y_test[:10].round(decimals=1)))
def __init__(self, endpoint_name, sagemaker_session=None): """Initialize an ``MXNetPredictor``. Args: endpoint_name (str): The name of the endpoint to perform inference on. sagemaker_session (sagemaker.session.Session): Session object which manages interactions with Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. """ super(MXNetPredictor, self).__init__(endpoint_name, sagemaker_session, JSONSerializer(), JSONDeserializer())
def __init__( self, endpoint_name, sagemaker_session=None, serializer=JSONSerializer(), deserializer=JSONDeserializer(), model_name=None, model_version=None, **kwargs, ): """Initialize a ``TensorFlowPredictor``. See :class:`~sagemaker.predictor.Predictor` for more info about parameters. Args: endpoint_name (str): The name of the endpoint to perform inference on. sagemaker_session (sagemaker.session.Session): Session object which manages interactions with Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. serializer (callable): Optional. Default serializes input data to json. Handles dicts, lists, and numpy arrays. deserializer (callable): Optional. Default parses the response using ``json.load(...)``. model_name (str): Optional. The name of the SavedModel model that should handle the request. If not specified, the endpoint's default model will handle the request. model_version (str): Optional. The version of the SavedModel model that should handle the request. If not specified, the latest version of the model will be used. """ removed_kwargs("content_type", kwargs) removed_kwargs("accept", kwargs) super(TensorFlowPredictor, self).__init__( endpoint_name, sagemaker_session, serializer, deserializer, ) attributes = [] if model_name: attributes.append("tfs-model-name={}".format(model_name)) if model_version: attributes.append("tfs-model-version={}".format(model_version)) self._model_attributes = ",".join(attributes) if attributes else None
def __init__(self, train_meta_path: Path, quantiles: List[float] = None): with train_meta_path.open("r") as f: self._train_meta = json.load(f) self._sm_predictor = sm.predictor.Predictor( endpoint_name=self._train_meta["endpoint_name"], serializer=JSONSerializer(), sagemaker_session=sm.Session()) self._earliest_date = date.fromisoformat( self._train_meta["train_config"]["train_start"]) self._predict_quantiles = quantiles self._predict_length = int(self._train_meta["train_config"] ["hyper_parameters"]["prediction_length"]) self._cat_mapping = self._train_meta["cat_mapping"] self._cat_keys = self._train_meta["train_config"]["cat"] self._feat_names = self._train_meta["train_config"]["dynamic_feat"] self._config = Configuration(quantiles=[str(x) for x in quantiles])
def _test_sm_trained_model(sagemaker_session, framework_version, ecr_image, instance_type, model_dir, accelerator_type=None): endpoint_name = sagemaker.utils.unique_name_from_base( "sagemaker-huggingface-serving-trained-model") model_data = sagemaker_session.upload_data( path=model_dir, key_prefix="sagemaker-huggingface-serving-trained-model/models", ) model_file = pt_model if "pytorch" in ecr_image else tf_model hf_model = Model( model_data=f"{model_data}/{model_file}", role="SageMakerRole", image_uri=ecr_image, sagemaker_session=sagemaker_session, predictor_cls=Predictor, ) with timeout_and_delete_endpoint(endpoint_name, sagemaker_session, minutes=30): predictor = hf_model.deploy( initial_instance_count=1, instance_type=instance_type, endpoint_name=endpoint_name, ) data = { "inputs": "Camera - You are awarded a SiPix Digital Camera! call 09061221066 fromm landline. Delivery within 28 days." } predictor.serializer = JSONSerializer() predictor.deserializer = JSONDeserializer() output = predictor.predict(data) assert "score" in output[0]
def test_predict_call_with_json(): sagemaker_session = json_sagemaker_session() predictor = Predictor(ENDPOINT, sagemaker_session, serializer=JSONSerializer()) data = [1, 2] result = predictor.predict(data) assert sagemaker_session.sagemaker_runtime_client.invoke_endpoint.called expected_request_args = { "Accept": DEFAULT_ACCEPT, "Body": json.dumps(data), "ContentType": "application/json", "EndpointName": ENDPOINT, } call_args, kwargs = sagemaker_session.sagemaker_runtime_client.invoke_endpoint.call_args assert kwargs == expected_request_args assert result == json.dumps([RETURN_VALUE])
production_variants = endpoint_config["ProductionVariants"] return [d["ModelName"] for d in production_variants] @property def content_type(self): """The MIME type of the data sent to the inference endpoint.""" return self.serializer.CONTENT_TYPE @property def accept(self): """The content type(s) that are expected from the inference endpoint.""" return self.deserializer.ACCEPT @property def endpoint(self): """Deprecated attribute. Please use endpoint_name.""" renamed_warning("The endpoint attribute") return self.endpoint_name csv_serializer = deprecated_serialize(CSVSerializer(), "csv_serializer") json_serializer = deprecated_serialize(JSONSerializer(), "json_serializer") npy_serializer = deprecated_serialize(NumpySerializer(), "npy_serializer") csv_deserializer = deprecated_deserialize(CSVDeserializer(), "csv_deserializer") json_deserializer = deprecated_deserialize(JSONDeserializer(), "json_deserializer") numpy_deserializer = deprecated_deserialize(NumpyDeserializer(), "numpy_deserializer") RealTimePredictor = deprecated_class(Predictor, "RealTimePredictor")
def __init__(self, endpoint_name: str, sagemaker_session=None): super().__init__(endpoint_name=endpoint_name, sagemaker_session=sagemaker_session, deserializer=StringDeserializer(), serializer=JSONSerializer())
def json_serializer(): return JSONSerializer()
articles_df = articles_df[['source.name', 'content', 'description']] articles_df['inputs'] = articles_df[['content', 'description' ]].apply(lambda x: ''.join(x), axis=1) articles_df.drop(['content', 'description'], axis=1, inplace=True) articles_df.rename(columns={'source.name': 'source'}, inplace=True) df = sec_df.append(articles_df, ignore_index=True) data = {} data['inputs'] = df['inputs'].tolist() #initialize predictor from Endpoint predictor = sagemaker.predictor.Predictor( endpoint_name=args.endpoint_name, sagemaker_session=sagemaker_session, serializer=JSONSerializer(), deserializer=JSONDeserializer()) # predict for all chunks try: response = predictor.predict(data) response_df = pd.json_normalize(response) response_df['source'] = df['source'] response_df = response_df[['source', 'label', 'score']] response_df.to_csv( f'/opt/ml/processing/output/{args.ticker_cik}_sentiment_result.csv', index=False) except ClientError as e: stacktrace = traceback.format_exc() error_message = e.response["Error"]["Message"] LOGGER.error("{}".format(stacktrace))