def _test_mnist_distributed(sagemaker_session, ecr_image, instance_type, model_dir, mnist_script, accelerator_type=None): endpoint_name = sagemaker.utils.unique_name_from_base("sagemaker-pytorch-serving") model_data = sagemaker_session.upload_data( path=model_dir, key_prefix="sagemaker-pytorch-serving/models", ) pytorch = PyTorchModel(model_data=model_data, role='SageMakerRole', entry_point=mnist_script, image=ecr_image, sagemaker_session=sagemaker_session) with timeout_and_delete_endpoint(endpoint_name, sagemaker_session, minutes=30): # Use accelerator type to differentiate EI vs. CPU and GPU. Don't use processor value if accelerator_type is not None: predictor = pytorch.deploy(initial_instance_count=1, instance_type=instance_type, accelerator_type=accelerator_type, endpoint_name=endpoint_name) else: predictor = pytorch.deploy(initial_instance_count=1, instance_type=instance_type, endpoint_name=endpoint_name) batch_size = 100 data = np.random.rand(batch_size, 1, 28, 28).astype(np.float32) output = predictor.predict(data) assert output.shape == (batch_size, 10)
def test_model_image_accelerator(sagemaker_session): with pytest.raises(ValueError) as error: model = PyTorchModel( MODEL_DATA, role=ROLE, entry_point=SCRIPT_PATH, sagemaker_session=sagemaker_session, framework_version="1.3.1", py_version="py2", ) model.deploy(1, CPU, accelerator_type=ACCELERATOR_TYPE) assert "Unsupported Python version: py2." in str(error)
def _test_mnist_distributed(sagemaker_session, ecr_image, instance_type): model_dir = os.path.join(model_cpu_dir, 'model_mnist.tar.gz') endpoint_name = sagemaker.utils.unique_name_from_base( "sagemaker-pytorch-serving") model_data = sagemaker_session.upload_data( path=model_dir, key_prefix="sagemaker-pytorch-serving/models", ) pytorch = PyTorchModel(model_data, 'SageMakerRole', mnist_script, ecr_image, sagemaker_session) with timeout_and_delete_endpoint(endpoint_name, sagemaker_session, minutes=30): predictor = pytorch.deploy(initial_instance_count=1, instance_type=instance_type, endpoint_name=endpoint_name) batch_size = 100 data = np.random.rand(batch_size, 1, 28, 28).astype(np.float32) output = predictor.predict(data) assert output.shape == (batch_size, 10)
def deploy(): session = sagemaker.Session() modelArtifacts = uploadModel(session, modelPath=PARAM_PATH, tokenizerPath=TOKENIZER, resourceName=MODEL_NAME) model = PyTorchModel( model_data=modelArtifacts, name='{}-{}'.format(MODEL_NAME, VERSION), role='SageMakerRole', # Needs to defined beforehand framework_version='1.1.0', entry_point='serve.py', source_dir='release', predictor_cls=ShortAnswerPredictor) model.deploy(initial_instance_count=1, instance_type=INSTANCE_TYPE)
def test_model(sagemaker_session): model = PyTorchModel("s3://some/data.tar.gz", role=ROLE, entry_point=SCRIPT_PATH, sagemaker_session=sagemaker_session) predictor = model.deploy(1, GPU) assert isinstance(predictor, PyTorchPredictor)
def main(): test_loader = download_data_for_inference() sagemaker_session = LocalSession() sagemaker_session.config = {'local': {'local_code': True}} # For local training a dummy role will be sufficient role = DUMMY_IAM_ROLE model_dir = 's3://aws-ml-blog/artifacts/pytorch-script-mode-local-model-inference/model.tar.gz' model = PyTorchModel( role=role, model_data=model_dir, framework_version='1.8', py_version='py3', entry_point='inference.py' ) print('Deploying endpoint in local mode') print( 'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.') predictor = model.deploy( initial_instance_count=1, instance_type='local', ) do_inference_on_local_endpoint(predictor, test_loader) predictor.delete_endpoint(predictor.endpoint)
def test_model(sagemaker_session): model = PyTorchModel(MODEL_DATA, role=ROLE, entry_point=SCRIPT_PATH, sagemaker_session=sagemaker_session) predictor = model.deploy(1, GPU) assert isinstance(predictor, PyTorchPredictor)
def test_model(sagemaker_session, pytorch_inference_version, pytorch_inference_py_version): model = PyTorchModel( MODEL_DATA, role=ROLE, entry_point=SCRIPT_PATH, framework_version=pytorch_inference_version, py_version=pytorch_inference_py_version, sagemaker_session=sagemaker_session, ) predictor = model.deploy(1, GPU) assert isinstance(predictor, PyTorchPredictor)
def _predictor(model_dir, script, image, sagemaker_local_session, instance_type, model_server_workers=None): model = PyTorchModel('file://{}'.format(model_dir), ROLE, script, image=image, sagemaker_session=sagemaker_local_session, model_server_workers=model_server_workers) with local_mode_utils.lock(): try: predictor = model.deploy(1, instance_type) yield predictor finally: predictor.delete_endpoint()
def deploy(): config.logger.info("Deploying model_name=%s to env=%s" % (env.setting('model_name'), env.current_env())) build_model_data_file() upload_model_data() pytorch_model = PyTorchModel( model_data = env.setting('model_data_path'), name = env.setting('model_name'), framework_version = '1.4.0', role = env.setting("aws_role"), env = {"DEPLOY_ENV": env.current_env()}, entry_point = 'deploy/sagemaker/serve.py') if env.isDeployed(): delete_endpoint_and_config() predictor = pytorch_model.deploy( instance_type = env.setting('instance_type'), # Below isn't working: https://github.com/aws/sagemaker-python-sdk/issues/101#issuecomment-607376320 # update_endpoint = update_endpoint_if_exists(), initial_instance_count = 1)
def test_model_custom_serialization( sagemaker_session, pytorch_inference_version, pytorch_inference_py_version, ): model = PyTorchModel( MODEL_DATA, role=ROLE, entry_point=SCRIPT_PATH, framework_version=pytorch_inference_version, py_version=pytorch_inference_py_version, sagemaker_session=sagemaker_session, ) custom_serializer = Mock() custom_deserializer = Mock() predictor = model.deploy( 1, GPU, serializer=custom_serializer, deserializer=custom_deserializer, ) assert isinstance(predictor, PyTorchPredictor) assert predictor.serializer is custom_serializer assert predictor.deserializer is custom_deserializer
def main(): sagemaker_session = LocalSession() sagemaker_session.config = {'local': {'local_code': True}} # For local training a dummy role will be sufficient role = DUMMY_IAM_ROLE model_dir = 's3://aws-ml-blog/artifacts/pytorch-nlp-script-mode-local-model-inference/model.tar.gz' test_data = pd.read_csv('./data/test_data.csv', header=None) print(f'test_data: {test_data}') model = PyTorchModel(role=role, model_data=model_dir, framework_version='1.7.1', source_dir='code', py_version='py3', entry_point='inference.py') print('Deploying endpoint in local mode') print( 'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.' ) predictor = model.deploy( initial_instance_count=1, instance_type='local', ) predictor.serializer = sagemaker.serializers.CSVSerializer() predictor.deserializer = sagemaker.deserializers.CSVDeserializer() predictions = predictor.predict(test_data.to_csv(header=False, index=False)) print(f'predictions: {predictions}') predictor.delete_endpoint(predictor.endpoint)
from sagemaker.pytorch import PyTorch from sagemaker.pytorch import PyTorchModel from comment import Comment sagemaker_session = sagemaker.Session(boto3.session.Session()) # Put the right role and input data role = "arn:aws:iam::294038372338:role/hunkimSagemaker" comment = Comment() values = comment.get_comment('model_data=') if values is None or len(values) == 0: comment.add_comment('Deploy Fail: no model data. Did you train?') exit(-1) print("Data:", values[-1]) model = PyTorchModel(model_data=values[-1], role=role, framework_version='1.5.0', entry_point='mnist.py', source_dir='code') comment.add_comment('Deploying with data ' + values[-1]) try: predictor = model.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge') comment.add_comment('end_point=' + predictor.endpoint) except Exception as e: comment.add_comment('Deploy Fail:' + str(e))
def main(): data, labels = read_imdb_data() train_X, test_X, train_y, test_y = prepare_imdb_data(data, labels) #storing the preprocess data as cache cache_dir = os.path.join( "cache", "sentiment_analysis") # where to store cache files os.makedirs(cache_dir, exist_ok=True) # ensure cache directory exists # Preprocess data train_X, test_X, train_y, test_y = preprocess_data(train_X, test_X, train_y, test_y, cache_dir) #building word dict from reviews word_dict = build_dict(train_X) #now we store word dict for future references data_dir = 'data/pytorch' # The folder we will use for storing data if not os.path.exists(data_dir): # Make sure that the folder exists os.makedirs(data_dir) with open(os.path.join(data_dir, 'word_dict.pkl'), "wb") as f: pickle.dump(word_dict, f) train_X, train_X_len = convert_and_pad_data(word_dict, train_X) test_X, test_X_len = convert_and_pad_data(word_dict, test_X) #store processed data pd.concat([pd.DataFrame(train_y), pd.DataFrame(train_X_len), pd.DataFrame(train_X)], axis=1) \ .to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False) loadEnv() # Accessing variables. access_key_id = os.getenv('ACCESS_KEY_ID') secret_key = os.getenv('SECRET_KEY') region = os.getenv('AWS_REGION') execution_role = os.getenv('EXEC_ROLE') # create sagemaker session session = boto3.Session(aws_access_key_id=access_key_id, aws_secret_access_key=secret_key, region_name=region) sagemaker_session = sagemaker.Session(boto_session=session) #update data to s3 bucket bucket = sagemaker_session.default_bucket() prefix = 'sagemaker/sentiment_rnn' role = execution_role input_data = sagemaker_session.upload_data(path=data_dir, bucket=bucket, key_prefix=prefix) # Read in only the first 250 rows train_sample = pd.read_csv(os.path.join(data_dir, 'train.csv'), header=None, names=None, nrows=250) # Turn the input pandas dataframe into tensors train_sample_y = torch.from_numpy( train_sample[[0]].values).float().squeeze() train_sample_X = torch.from_numpy(train_sample.drop([0], axis=1).values).long() # Build the dataset train_sample_ds = torch.utils.data.TensorDataset(train_sample_X, train_sample_y) # Build the dataloader train_sample_dl = torch.utils.data.DataLoader(train_sample_ds, batch_size=50) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") lstm_model = LSTMClassifier(32, 100, 5000).to(device) optimizer = optim.Adam(lstm_model.parameters()) loss_fn = torch.nn.BCELoss() train(lstm_model, train_sample_dl, 5, optimizer, loss_fn, device) estimator = PyTorch(entry_point="train.py", source_dir="train", role=role, framework_version='0.4.0', train_instance_count=1, train_instance_type='ml.m4.xlarge', hyperparameters={ 'epochs': 10, 'hidden_dim': 200, }) estimator.fit({'training': input_data}) # Deploy the trained model class StringPredictor(RealTimePredictor): def __init__(self, endpoint_name, sagemaker_session): super(StringPredictor, self).__init__(endpoint_name, sagemaker_session, content_type='text/plain') py_model = PyTorchModel(model_data=estimator.model_data, role=role, framework_version='0.4.0', entry_point='predict.py', source_dir='serve', predictor_cls=StringPredictor) pytorch_predictor = py_model.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge') print(pytorch_predictor.endpoint) return
class AwsPytorch(AwsBase, ABC): default_model_kwargs = {"framework_version": "1.0.0"} train_entry_point: str = "train.py" predict_entry_point: str = "predict.py" source_directory: str = "models/pytorch" name: str = "pytorch" max_prediction_size = 5e6 def __init__( self, data: DataLoader, aws_executor: Sagemaker, output_path: Optional[str] = None, local_save_folder: Optional[str] = None, ) -> None: super().__init__(data, aws_executor, output_path, local_save_folder) self._estimator: PyTorch = None self._model: PyTorchModel = None self._predictor: PyTorchPredictor = None def train(self, hyperparameters: Dict[str, Any] = {}) -> None: """ Trains the model, with the data provided """ LOGGER.info("Starting to train model.") model = self._get_model(hyperparameters) # Get the data and upload to S3 Y_train = self.data.train_data.loc[:, self.data.output_column] X_train = self.data.train_data.loc[:, self.data.feature_columns] s3_train_data = self._prepare_data("train", X_train, Y_train, s3_input_type=False) Y_validation = self.data.validation_data.loc[:, self.data.output_column] X_validation = self.data.validation_data.loc[:, self.data.feature_columns] s3_validation_data = self._prepare_data("validation", X_validation, Y_validation, s3_input_type=False) LOGGER.info("Starting to fit model") self._model.fit({ "train": s3_train_data, "validation": s3_validation_data }) LOGGER.info("Done with fitting model") def _get_model(self, hyperparameters): if self._model is not None: return self._model used_hyperparameters = { **self.default_hyperparameters, **hyperparameters } self._model = PyTorch( entry_point=self.train_entry_point, source_dir=self.source_directory, hyperparameters=used_hyperparameters, **self.default_model_kwargs, **self.executor.default_model_kwargs, ) return self._model def load_estimator(self, training_job_name: str) -> None: """ Load the already trained model to not have to train again. Arguments: model_name: The name of the training job, as provided by AWS """ LOGGER.info( f"Loading already trained pytorch training job: {training_job_name}" ) self._estimator = PyTorch.attach( training_job_name=training_job_name, sagemaker_session=self.executor.session) def load_model(self, model_location: Optional[str] = None) -> None: """ Load the already trained model to not have to train again. If model_location is not provided, it will first try to see if the estimator has model data, else it will try a default model name. Arguments: model_location: The location of the model on S3 """ if model_location is None: if self._estimator is not None: model_location = self._estimator.model_data else: model_location = ( f"s3://{self.executor.bucket}/{self.output_path}/model.tar.gz" ) LOGGER.info(f"Loading already created pytorch model {model_location}") self._model = PyTorchModel( model_data=model_location, role=self.executor.role, entry_point=self.predict_entry_point, source_dir=self.source_directory, sagemaker_session=self.executor.session, **self.default_model_kwargs, ) def load_predictor(self, predictor_name: str = None) -> None: """ Loads the predictor from the loaded model. If no model is present, it will load it. WARNING: a predictor costs money for the time it is online. Make sure to always take it down. """ if predictor_name is not None: self._predictor = PyTorchPredictor( predictor_name, sagemaker_session=self.executor.session) if self._predictor is not None: return self._predictor if self._model is None: self.load_model() LOGGER.info("Deploying the predictor") self._predictor = self._model.deploy( **self.executor.default_deploy_kwargs) LOGGER.warn("Don't forget to delete the predicion endpoint") def delete_endpoint(self) -> None: """ Deletes the endpoint. """ LOGGER.info("Deleting the pytorch endpoint") if self._predictor is not None: self._predictor.delete_endpoint() self._predictor = None def execute_prediction(self, X_test: DataFrame, name: str = "test") -> DataFrame: """ Executes the prediction. Loads and also deletes the endpoint. Splits the data in separate batches so they can be provided to the predictor. Arguments: X_test: the dataframe to predict Returns: Y_test: The predictions """ try: LOGGER.info("Starting the PyTorch predictions") self.load_predictor() # Split in batches that AWS accepts. Divide by 2 for good measure no_batches = math.ceil(X_test.values.nbytes / (self.max_prediction_size / 2)) batches = self.split_in_batches(X_test, no_batches) prediction_list = [] LOGGER.info("Sending the data to predict to the model") for batch in batches: predictions = self._predictor.predict(batch.values) prediction_list.append(DataFrame(predictions)) Y_test = pd.concat(prediction_list, axis=0, ignore_index=True) LOGGER.info("Got the predictions") finally: self.delete_endpoint() LOGGER.info("Deleting the endpoint") pass return Y_test def tune(self): return NotImplemented def split_in_batches(self, data: DataFrame, number_of_batches): """ Splits a dataframe in a number of batches. Lambda required the payload to be a maximum size, so a split needs to be made Arguments: data: Dataframe to split number_of_batches: The number of batches to split in Returns: A list of dataframes that combined represent the data. """ LOGGER.info("Splitting the data in batches") number_of_rows = data.shape[0] list_of_dfs = [] for i in range(number_of_batches): start_index = (i * number_of_rows) // number_of_batches end_index = ((i + 1) * number_of_rows) // number_of_batches batch = data.iloc[start_index:end_index] list_of_dfs.append(batch) LOGGER.info("Done splitting the data in batches") return list_of_dfs
subprocess.call(["tar", "-czvf", "traced_bert.tar.gz", "traced_bert.pt"]) instance_type = 'ml.m5.large' accelerator_type = 'ml.eia2.xlarge' # TorchScript model tar_filename = 'traced_bert.tar.gz' # Returns S3 bucket URL print('Upload tarball to S3') model_data = sagemaker_session.upload_data(path=tar_filename, bucket=bucket, key_prefix=prefix) endpoint_name = 'bert-ei-traced-{}-{}'.format(instance_type, accelerator_type).replace( '.', '').replace('_', '') pytorch = PyTorchModel(model_data=model_data, role=role, entry_point='deploy_ei.py', source_dir='code', framework_version='1.3.1', py_version='py3', sagemaker_session=sagemaker_session) # Function will exit before endpoint is finished creating predictor = pytorch.deploy(initial_instance_count=1, instance_type=instance_type, accelerator_type=accelerator_type, endpoint_name=endpoint_name, wait=True)
# # 4. # - create Jupyter notebook in same directory as inference.py # EXAMPLE deploy.ipynb: #filename deploy.ipynb from sagemaker.pytorch import PyTorchModel from sagemaker import get_execution_role role = get_execution_role() # You can also configure a sagemaker role and reference it by its name. # role = "CustomSageMakerRoleName" # pytorch_model = PyTorchModel(model_data='s3://pytorch-sagemaker-example/model.tar.gz', role=role, entry_point='inference.py', framework_version='1.3.1') pytorch_model = PyTorchModel(model_data='s3://sjcobb_bucket/3DPhoto/model.tar.gz', role=role, entry_point='inference.py', framework_version='1.3.1') predictor = pytorch_model.deploy(instance_type='ml.t2.medium', initial_instance_count=1) # # # 5. # - In SageMaker, open Jupyter notebook instance, there should be two files (inference.py & deploy.ipynb). # - Open and execute deploy.ipynb by choosing 'Run All' from the cell menu. This will deploy the model, as well as the endpoint. # - On successful deployment, you can make real-time predictions using InvokeEndpoint by sending a JSON object with a url of image to predict. For example: {"url":"https://example.com/predict.png"} # - https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_runtime_InvokeEndpoint.html # # # 6 - a. # - Create new application in github.com/sjcobb with call to InvokeEndpoint # - requests are authenticated using AWS Signature Version 4: https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-authenticating-requests.html # - POST request should use HTTP Authorization header: https://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-auth-using-authorization-header.html # - EXAMPLE InvokeEndpoint Syntax: