def load_model(self, model_location: Optional[str] = None) -> None: """ Load the already trained model to not have to train again. If model_location is not provided, it will first try to see if the estimator has model data, else it will try a default model name. Arguments: model_location: The location of the model on S3 """ if model_location is None: if self._estimator is not None: model_location = self._estimator.model_data else: model_location = ( f"s3://{self.executor.bucket}/{self.output_path}/model.tar.gz" ) LOGGER.info(f"Loading already created pytorch model {model_location}") self._model = PyTorchModel( model_data=model_location, role=self.executor.role, entry_point=self.predict_entry_point, source_dir=self.source_directory, sagemaker_session=self.executor.session, **self.default_model_kwargs, )
def main(): test_loader = download_data_for_inference() sagemaker_session = LocalSession() sagemaker_session.config = {'local': {'local_code': True}} # For local training a dummy role will be sufficient role = DUMMY_IAM_ROLE model_dir = 's3://aws-ml-blog/artifacts/pytorch-script-mode-local-model-inference/model.tar.gz' model = PyTorchModel( role=role, model_data=model_dir, framework_version='1.8', py_version='py3', entry_point='inference.py' ) print('Deploying endpoint in local mode') print( 'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.') predictor = model.deploy( initial_instance_count=1, instance_type='local', ) do_inference_on_local_endpoint(predictor, test_loader) predictor.delete_endpoint(predictor.endpoint)
def _test_mnist_distributed(sagemaker_session, ecr_image, instance_type): model_dir = os.path.join(model_cpu_dir, 'model_mnist.tar.gz') endpoint_name = sagemaker.utils.unique_name_from_base( "sagemaker-pytorch-serving") model_data = sagemaker_session.upload_data( path=model_dir, key_prefix="sagemaker-pytorch-serving/models", ) pytorch = PyTorchModel(model_data, 'SageMakerRole', mnist_script, ecr_image, sagemaker_session) with timeout_and_delete_endpoint(endpoint_name, sagemaker_session, minutes=30): predictor = pytorch.deploy(initial_instance_count=1, instance_type=instance_type, endpoint_name=endpoint_name) batch_size = 100 data = np.random.rand(batch_size, 1, 28, 28).astype(np.float32) output = predictor.predict(data) assert output.shape == (batch_size, 10)
def _test_mnist_distributed(sagemaker_session, ecr_image, instance_type, model_dir, mnist_script, accelerator_type=None): endpoint_name = sagemaker.utils.unique_name_from_base("sagemaker-pytorch-serving") model_data = sagemaker_session.upload_data( path=model_dir, key_prefix="sagemaker-pytorch-serving/models", ) pytorch = PyTorchModel(model_data=model_data, role='SageMakerRole', entry_point=mnist_script, image=ecr_image, sagemaker_session=sagemaker_session) with timeout_and_delete_endpoint(endpoint_name, sagemaker_session, minutes=30): # Use accelerator type to differentiate EI vs. CPU and GPU. Don't use processor value if accelerator_type is not None: predictor = pytorch.deploy(initial_instance_count=1, instance_type=instance_type, accelerator_type=accelerator_type, endpoint_name=endpoint_name) else: predictor = pytorch.deploy(initial_instance_count=1, instance_type=instance_type, endpoint_name=endpoint_name) batch_size = 100 data = np.random.rand(batch_size, 1, 28, 28).astype(np.float32) output = predictor.predict(data) assert output.shape == (batch_size, 10)
def test_model(sagemaker_session): model = PyTorchModel(MODEL_DATA, role=ROLE, entry_point=SCRIPT_PATH, sagemaker_session=sagemaker_session) predictor = model.deploy(1, GPU) assert isinstance(predictor, PyTorchPredictor)
def test_model(sagemaker_session): model = PyTorchModel("s3://some/data.tar.gz", role=ROLE, entry_point=SCRIPT_PATH, sagemaker_session=sagemaker_session) predictor = model.deploy(1, GPU) assert isinstance(predictor, PyTorchPredictor)
def test_model_image_accelerator(sagemaker_session): model = PyTorchModel(MODEL_DATA, role=ROLE, entry_point=SCRIPT_PATH, sagemaker_session=sagemaker_session) with pytest.raises(ValueError): model.prepare_container_def(INSTANCE_TYPE, accelerator_type=ACCELERATOR_TYPE)
def test_model_prepare_container_def_no_instance_type_or_image(): model = PyTorchModel(MODEL_DATA, role=ROLE, entry_point=SCRIPT_PATH) with pytest.raises(ValueError) as e: model.prepare_container_def() expected_msg = "Must supply either an instance type (for choosing CPU vs GPU) or an image URI." assert expected_msg in str(e)
def test_transform_pytorch_vpc_custom_model_bucket( sagemaker_session, pytorch_inference_latest_version, pytorch_inference_latest_py_version, cpu_instance_type, custom_bucket_name, ): data_dir = os.path.join(DATA_DIR, "pytorch_mnist") ec2_client = sagemaker_session.boto_session.client("ec2") subnet_ids, security_group_id = get_or_create_vpc_resources(ec2_client) model_data = sagemaker_session.upload_data( path=os.path.join(data_dir, "model.tar.gz"), bucket=custom_bucket_name, key_prefix="integ-test-data/pytorch_mnist/model", ) model = PyTorchModel( model_data=model_data, entry_point=os.path.join(data_dir, "mnist.py"), role="SageMakerRole", framework_version=pytorch_inference_latest_version, py_version=pytorch_inference_latest_py_version, sagemaker_session=sagemaker_session, vpc_config={ "Subnets": subnet_ids, "SecurityGroupIds": [security_group_id] }, code_location="s3://{}".format(custom_bucket_name), ) transform_input = sagemaker_session.upload_data( path=os.path.join(data_dir, "transform", "data.npy"), key_prefix="integ-test-data/pytorch_mnist/transform", ) transformer = model.transformer(1, cpu_instance_type) transformer.transform( transform_input, content_type="application/x-npy", job_name=unique_name_from_base("test-transform-vpc"), ) with timeout_and_delete_model_with_transformer( transformer, sagemaker_session, minutes=TRANSFORM_DEFAULT_TIMEOUT_MINUTES): transformer.wait() model_desc = sagemaker_session.sagemaker_client.describe_model( ModelName=transformer.model_name) assert set(subnet_ids) == set(model_desc["VpcConfig"]["Subnets"]) assert [security_group_id ] == model_desc["VpcConfig"]["SecurityGroupIds"] model_bucket, _ = s3.parse_s3_url( model_desc["PrimaryContainer"]["ModelDataUrl"]) assert custom_bucket_name == model_bucket
def test_model_py2_warning(warning, sagemaker_session): model = PyTorchModel( MODEL_DATA, role=ROLE, entry_point=SCRIPT_PATH, sagemaker_session=sagemaker_session, py_version="py2", ) assert model.py_version == "py2" warning.assert_called_with(model.__framework_name__, defaults.LATEST_PY2_VERSION)
def test_non_mms_model(repack_model, sagemaker_session): PyTorchModel( MODEL_DATA, role=ROLE, entry_point=SCRIPT_PATH, sagemaker_session=sagemaker_session, framework_version="1.1", ).deploy(1, GPU) repack_model.assert_not_called()
def test_model_empty_framework_version(warning, sagemaker_session): model = PyTorchModel( MODEL_DATA, role=ROLE, entry_point=SCRIPT_PATH, sagemaker_session=sagemaker_session, framework_version=None, ) assert model.framework_version == defaults.PYTORCH_VERSION warning.assert_called_with(defaults.PYTORCH_VERSION, defaults.LATEST_VERSION)
def test_model(sagemaker_session, pytorch_inference_version, pytorch_inference_py_version): model = PyTorchModel( MODEL_DATA, role=ROLE, entry_point=SCRIPT_PATH, framework_version=pytorch_inference_version, py_version=pytorch_inference_py_version, sagemaker_session=sagemaker_session, ) predictor = model.deploy(1, GPU) assert isinstance(predictor, PyTorchPredictor)
def test_model_image_accelerator(sagemaker_session): with pytest.raises(ValueError) as error: model = PyTorchModel( MODEL_DATA, role=ROLE, entry_point=SCRIPT_PATH, sagemaker_session=sagemaker_session, framework_version="1.3.1", py_version="py2", ) model.deploy(1, CPU, accelerator_type=ACCELERATOR_TYPE) assert "Unsupported Python version: py2." in str(error)
def _predictor(model_dir, script, image, sagemaker_local_session, instance_type, model_server_workers=None): model = PyTorchModel('file://{}'.format(model_dir), ROLE, script, image=image, sagemaker_session=sagemaker_local_session, model_server_workers=model_server_workers) with local_mode_utils.lock(): try: predictor = model.deploy(1, instance_type) yield predictor finally: predictor.delete_endpoint()
def deploy(): session = sagemaker.Session() modelArtifacts = uploadModel(session, modelPath=PARAM_PATH, tokenizerPath=TOKENIZER, resourceName=MODEL_NAME) model = PyTorchModel( model_data=modelArtifacts, name='{}-{}'.format(MODEL_NAME, VERSION), role='SageMakerRole', # Needs to defined beforehand framework_version='1.1.0', entry_point='serve.py', source_dir='release', predictor_cls=ShortAnswerPredictor) model.deploy(initial_instance_count=1, instance_type=INSTANCE_TYPE)
def test_mms_model(repack_model, sagemaker_session): PyTorchModel( MODEL_DATA, role=ROLE, entry_point=SCRIPT_PATH, sagemaker_session=sagemaker_session, framework_version="1.2", ).deploy(1, GPU) repack_model.assert_called_with( dependencies=[], inference_script=SCRIPT_PATH, kms_key=None, model_uri="s3://some/data.tar.gz", repacked_model_uri=ANY, sagemaker_session=sagemaker_session, source_directory=None, )
def deploy(): config.logger.info("Deploying model_name=%s to env=%s" % (env.setting('model_name'), env.current_env())) build_model_data_file() upload_model_data() pytorch_model = PyTorchModel( model_data = env.setting('model_data_path'), name = env.setting('model_name'), framework_version = '1.4.0', role = env.setting("aws_role"), env = {"DEPLOY_ENV": env.current_env()}, entry_point = 'deploy/sagemaker/serve.py') if env.isDeployed(): delete_endpoint_and_config() predictor = pytorch_model.deploy( instance_type = env.setting('instance_type'), # Below isn't working: https://github.com/aws/sagemaker-python-sdk/issues/101#issuecomment-607376320 # update_endpoint = update_endpoint_if_exists(), initial_instance_count = 1)
def test_model_custom_serialization( sagemaker_session, pytorch_inference_version, pytorch_inference_py_version, ): model = PyTorchModel( MODEL_DATA, role=ROLE, entry_point=SCRIPT_PATH, framework_version=pytorch_inference_version, py_version=pytorch_inference_py_version, sagemaker_session=sagemaker_session, ) custom_serializer = Mock() custom_deserializer = Mock() predictor = model.deploy( 1, GPU, serializer=custom_serializer, deserializer=custom_deserializer, ) assert isinstance(predictor, PyTorchPredictor) assert predictor.serializer is custom_serializer assert predictor.deserializer is custom_deserializer
def main(): sagemaker_session = LocalSession() sagemaker_session.config = {'local': {'local_code': True}} # For local training a dummy role will be sufficient role = DUMMY_IAM_ROLE model_dir = 's3://aws-ml-blog/artifacts/pytorch-nlp-script-mode-local-model-inference/model.tar.gz' test_data = pd.read_csv('./data/test_data.csv', header=None) print(f'test_data: {test_data}') model = PyTorchModel(role=role, model_data=model_dir, framework_version='1.7.1', source_dir='code', py_version='py3', entry_point='inference.py') print('Deploying endpoint in local mode') print( 'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.' ) predictor = model.deploy( initial_instance_count=1, instance_type='local', ) predictor.serializer = sagemaker.serializers.CSVSerializer() predictor.deserializer = sagemaker.deserializers.CSVDeserializer() predictions = predictor.predict(test_data.to_csv(header=False, index=False)) print(f'predictions: {predictions}') predictor.delete_endpoint(predictor.endpoint)
# - In inference.py, specify how the SageMaker model server should load and serve the model: # MUST implement 3 functions: input_fn, predict_fn, output_fn # # 4. # - create Jupyter notebook in same directory as inference.py # EXAMPLE deploy.ipynb: #filename deploy.ipynb from sagemaker.pytorch import PyTorchModel from sagemaker import get_execution_role role = get_execution_role() # You can also configure a sagemaker role and reference it by its name. # role = "CustomSageMakerRoleName" # pytorch_model = PyTorchModel(model_data='s3://pytorch-sagemaker-example/model.tar.gz', role=role, entry_point='inference.py', framework_version='1.3.1') pytorch_model = PyTorchModel(model_data='s3://sjcobb_bucket/3DPhoto/model.tar.gz', role=role, entry_point='inference.py', framework_version='1.3.1') predictor = pytorch_model.deploy(instance_type='ml.t2.medium', initial_instance_count=1) # # # 5. # - In SageMaker, open Jupyter notebook instance, there should be two files (inference.py & deploy.ipynb). # - Open and execute deploy.ipynb by choosing 'Run All' from the cell menu. This will deploy the model, as well as the endpoint. # - On successful deployment, you can make real-time predictions using InvokeEndpoint by sending a JSON object with a url of image to predict. For example: {"url":"https://example.com/predict.png"} # - https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_runtime_InvokeEndpoint.html # # # 6 - a. # - Create new application in github.com/sjcobb with call to InvokeEndpoint # - requests are authenticated using AWS Signature Version 4: https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-authenticating-requests.html
from sagemaker.pytorch import PyTorch from sagemaker.pytorch import PyTorchModel from comment import Comment sagemaker_session = sagemaker.Session(boto3.session.Session()) # Put the right role and input data role = "arn:aws:iam::294038372338:role/hunkimSagemaker" comment = Comment() values = comment.get_comment('model_data=') if values is None or len(values) == 0: comment.add_comment('Deploy Fail: no model data. Did you train?') exit(-1) print("Data:", values[-1]) model = PyTorchModel(model_data=values[-1], role=role, framework_version='1.5.0', entry_point='mnist.py', source_dir='code') comment.add_comment('Deploying with data ' + values[-1]) try: predictor = model.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge') comment.add_comment('end_point=' + predictor.endpoint) except Exception as e: comment.add_comment('Deploy Fail:' + str(e))
from sagemaker.pytorch import PyTorchModel model = PyTorchModel(model_data=model_data, role=role, framework_version='1.5.0', entry_point='que_gen.py', source_dir='code', predictor_cls=JSONPredictor) predictor = model.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge') print("okay!!!")
import sagemaker role = sagemaker.get_execution_role() from sagemaker.pytorch import PyTorchModel model_data = 's3://dataset-retinopathy/deployments/model.tar.gz' model = PyTorchModel(model_data=model_data, role=role, entry_point='inference.py', framework_version='1.6.0', py_version='py3', source_dir='code')
local_input = Path('data/test_AM_image/source_tiles') for group_path in local_input.iterdir(): for file_path in (group_path / 'source').iterdir(): s3_file_path = input_prefix / group_path.name / file_path.name print(f'Uploading {file_path} to {s3_file_path}') s3.upload_file(str(file_path), bucket, str(s3_file_path)) session = Session() s3_input = f's3://{bucket}/{input_prefix}' s3_output = f's3://{bucket}/{output_prefix}' pytorch_model = PyTorchModel(model_data='s3://am-segm/unet.tar.gz', image='236062312728.dkr.ecr.eu-west-1.amazonaws.com/intsco/am-segm', role='AM-SegmSageMakerRole', entry_point='sagemaker/main.py', sagemaker_session=session) transformer = pytorch_model.transformer(instance_count=3, instance_type='ml.c4.xlarge', # instance_type='ml.p2.xlarge', output_path=s3_output, accept='application/x-image', strategy='SingleRecord', env={'MODEL_SERVER_TIMEOUT': '180'}) start = time() transformer.transform(data=s3_input, data_type='S3Prefix', content_type='application/x-image') transformer.wait() print('{} min {} sec'.format(*divmod(int(time() - start), 60)))
def main(): data, labels = read_imdb_data() train_X, test_X, train_y, test_y = prepare_imdb_data(data, labels) #storing the preprocess data as cache cache_dir = os.path.join( "cache", "sentiment_analysis") # where to store cache files os.makedirs(cache_dir, exist_ok=True) # ensure cache directory exists # Preprocess data train_X, test_X, train_y, test_y = preprocess_data(train_X, test_X, train_y, test_y, cache_dir) #building word dict from reviews word_dict = build_dict(train_X) #now we store word dict for future references data_dir = 'data/pytorch' # The folder we will use for storing data if not os.path.exists(data_dir): # Make sure that the folder exists os.makedirs(data_dir) with open(os.path.join(data_dir, 'word_dict.pkl'), "wb") as f: pickle.dump(word_dict, f) train_X, train_X_len = convert_and_pad_data(word_dict, train_X) test_X, test_X_len = convert_and_pad_data(word_dict, test_X) #store processed data pd.concat([pd.DataFrame(train_y), pd.DataFrame(train_X_len), pd.DataFrame(train_X)], axis=1) \ .to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False) loadEnv() # Accessing variables. access_key_id = os.getenv('ACCESS_KEY_ID') secret_key = os.getenv('SECRET_KEY') region = os.getenv('AWS_REGION') execution_role = os.getenv('EXEC_ROLE') # create sagemaker session session = boto3.Session(aws_access_key_id=access_key_id, aws_secret_access_key=secret_key, region_name=region) sagemaker_session = sagemaker.Session(boto_session=session) #update data to s3 bucket bucket = sagemaker_session.default_bucket() prefix = 'sagemaker/sentiment_rnn' role = execution_role input_data = sagemaker_session.upload_data(path=data_dir, bucket=bucket, key_prefix=prefix) # Read in only the first 250 rows train_sample = pd.read_csv(os.path.join(data_dir, 'train.csv'), header=None, names=None, nrows=250) # Turn the input pandas dataframe into tensors train_sample_y = torch.from_numpy( train_sample[[0]].values).float().squeeze() train_sample_X = torch.from_numpy(train_sample.drop([0], axis=1).values).long() # Build the dataset train_sample_ds = torch.utils.data.TensorDataset(train_sample_X, train_sample_y) # Build the dataloader train_sample_dl = torch.utils.data.DataLoader(train_sample_ds, batch_size=50) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") lstm_model = LSTMClassifier(32, 100, 5000).to(device) optimizer = optim.Adam(lstm_model.parameters()) loss_fn = torch.nn.BCELoss() train(lstm_model, train_sample_dl, 5, optimizer, loss_fn, device) estimator = PyTorch(entry_point="train.py", source_dir="train", role=role, framework_version='0.4.0', train_instance_count=1, train_instance_type='ml.m4.xlarge', hyperparameters={ 'epochs': 10, 'hidden_dim': 200, }) estimator.fit({'training': input_data}) # Deploy the trained model class StringPredictor(RealTimePredictor): def __init__(self, endpoint_name, sagemaker_session): super(StringPredictor, self).__init__(endpoint_name, sagemaker_session, content_type='text/plain') py_model = PyTorchModel(model_data=estimator.model_data, role=role, framework_version='0.4.0', entry_point='predict.py', source_dir='serve', predictor_cls=StringPredictor) pytorch_predictor = py_model.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge') print(pytorch_predictor.endpoint) return
# predictor.content_type = "application/json" # predictor.accept = "application/json" # predictor.serializer = sagemaker.serializers.JSONSerializer() # predictor.deserializer = sagemaker.deserializers.JSONDeserializer() # result = predictor.predict("Somebody just left - guess who.") # print(np.argmax(result, axis=1)) # predictor.delete_endpoint() # Pretrained model pytorch_model = PyTorchModel( model_data= "s3://sagemaker-us-east-1-111652037296/pytorch-training-2020-12-29-19-35-32-544/output/model.tar.gz", role=role, framework_version="1.3.1", source_dir="code", py_version="py3", entry_point="train_deploy.py") ##predictor = pytorch_model.deploy(initial_instance_count=1, instance_type="ml.m4.xlarge") # print(estimator.model_data) model_torchScript = BertForSequenceClassification.from_pretrained( "model/", torchscript=True) device = "cpu" for_jit_trace_input_ids = [0] * 64 for_jit_trace_attention_masks = [0] * 64 for_jit_trace_input = torch.tensor([for_jit_trace_input_ids]) for_jit_trace_masks = torch.tensor([for_jit_trace_input_ids])
# # **NOTE**: The default behaviour for a deployed PyTorch model is to assume that any input passed to the predictor is a `numpy` array. In our case we want to send a string so we need to construct a simple wrapper around the `RealTimePredictor` class to accomodate simple strings. In a more complicated situation you may want to provide a serialization object, for example if you wanted to sent image data. # In[38]: from sagemaker.predictor import RealTimePredictor from sagemaker.pytorch import PyTorchModel class StringPredictor(RealTimePredictor): def __init__(self, endpoint_name, sagemaker_session): super(StringPredictor, self).__init__(endpoint_name, sagemaker_session, content_type='text/plain') model = PyTorchModel(model_data=estimator.model_data, role = role, framework_version='0.4.0', entry_point='predict.py', source_dir='serve', predictor_cls=StringPredictor) predictor = model.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge') # ### Testing the model # # Now that we have deployed our model with the custom inference code, we should test to see if everything is working. Here we test our model by loading the first `250` positive and negative reviews and send them to the endpoint, then collect the results. The reason for only sending some of the data is that the amount of time it takes for our model to process the input and then perform inference is quite long and so testing the entire data set would be prohibitive. # In[39]: import glob def test_reviews(data_dir='../data/aclImdb', stop=250):
def output_fn(prediction_output, accept='application/json'): logger.info('Serializing the generated output.') classes = {0: 'Leopards', 1: 'airplanes', 2: 'butterfly', 3: 'camera', 4: 'elephant', 5: 'lamp', 6: 'rhino', 7: 'umbrella', 8: 'watch'} topk, topclass = prediction_output.topk(3, dim=1) result = [] for i in range(3): pred = {'prediction': classes[topclass.cpu().numpy()[0][i]], 'score': f'{topk.cpu().numpy()[0][i] * 100}%'} logger.info(f'Adding pediction: {pred}') result.append(pred) if accept == 'application/json': return json.dumps(result), accept raise Exception(f'Requested unsupported ContentType in Accept:{accept}') from sagemaker import get_execution_role # filename deploy.ipynb from sagemaker.pytorch import PyTorchModel role = get_execution_role() # You can also configure a sagemaker role and reference it by its name. # role = "CustomSageMakerRoleName" pytorch_model = PyTorchModel(model_data='s3://pytorch-sagemaker-example/model.tar.gz', role=role, entry_point='inference.py', framework_version='1.3.1') predictor = pytorch_model.deploy(instance_type='ml.t2.medium', initial_instance_count=1)