def run_as_local_main(): args = parse_infer_args() sm_boto3 = boto3.client('sagemaker') sess = sagemaker.Session() region = sess.boto_session.region_name model_url = args.model_file model = PyTorchModel(model_data=model_url, source_dir=os.path.abspath(os.path.dirname(__file__)), role=get_sm_execution_role(ON_SAGEMAKER_NOTEBOOK, region), framework_version='1.0.0', entry_point='inference.py') infer_mode = args.infer_mode if 'bt' == infer_mode: env = {'MODEL_SERVER_TIMEOUT': '120'} transformer = model.transformer( instance_count=1, instance_type='ml.c5.xlarge', output_path=args.output_dir, max_payload=99, env=env, max_concurrent_transforms=1, tags=[{ "Key": "Project", "Value": "SM Example" }], ) transformer.transform(args.input_file, content_type="text/csv") transformer.wait() elif 'ep' == infer_mode: model.deploy(instance_type='ml.c5.xlarge', initial_instance_count=1) else: raise Exception(f'Unknown inference mode {infer_mode}')
def main(): sagemaker_session = LocalSession() sagemaker_session.config = {'local': {'local_code': True}} # For local training a dummy role will be sufficient role = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' print('Deploying local mode endpoint') print( 'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.' ) pytorch_model = PyTorchModel(model_data='./model/model.tar.gz', role=role, framework_version="1.7.1", source_dir="code", py_version="py3", entry_point="inference.py") predictor = pytorch_model.deploy(initial_instance_count=1, instance_type='local') predictor.serializer = sagemaker.serializers.JSONSerializer() predictor.deserializer = sagemaker.deserializers.JSONDeserializer() result = predictor.predict("אני אוהב לעבוד באמזון") print('result: {}'.format(result)) predictor.delete_endpoint(predictor.endpoint)
def test_deploy_model_with_accelerator( sagemaker_session, cpu_instance_type, pytorch_eia_latest_version, pytorch_eia_latest_py_version, ): endpoint_name = "test-pytorch-deploy-eia-{}".format(sagemaker_timestamp()) model_data = sagemaker_session.upload_data(path=EIA_MODEL) pytorch = PyTorchModel( model_data, "SageMakerRole", entry_point=EIA_SCRIPT, framework_version=pytorch_eia_latest_version, py_version=pytorch_eia_latest_py_version, sagemaker_session=sagemaker_session, ) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): predictor = pytorch.deploy( initial_instance_count=1, instance_type=cpu_instance_type, accelerator_type="ml.eia1.medium", endpoint_name=endpoint_name, ) batch_size = 100 data = numpy.random.rand(batch_size, 1, 28, 28).astype(numpy.float32) output = predictor.predict(data) assert output.shape == (batch_size, 10)
def test_deploy_packed_model_with_entry_point_name( sagemaker_session, cpu_instance_type, pytorch_inference_latest_version, pytorch_inference_latest_py_version, ): endpoint_name = "test-pytorch-deploy-model-{}".format(sagemaker_timestamp()) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model_data = sagemaker_session.upload_data(path=PACKED_MODEL) model = PyTorchModel( model_data, "SageMakerRole", entry_point="mnist.py", framework_version=pytorch_inference_latest_version, py_version=pytorch_inference_latest_py_version, sagemaker_session=sagemaker_session, ) predictor = model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name) batch_size = 100 data = numpy.random.rand(batch_size, 1, 28, 28).astype(numpy.float32) output = predictor.predict(data) assert output.shape == (batch_size, 10)
def test_deploy_model( pytorch_training_job, sagemaker_session, cpu_instance_type, pytorch_inference_latest_version, pytorch_inference_latest_py_version, ): endpoint_name = "test-pytorch-deploy-model-{}".format(sagemaker_timestamp()) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): desc = sagemaker_session.sagemaker_client.describe_training_job( TrainingJobName=pytorch_training_job ) model_data = desc["ModelArtifacts"]["S3ModelArtifacts"] model = PyTorchModel( model_data, "SageMakerRole", entry_point=MNIST_SCRIPT, framework_version=pytorch_inference_latest_version, py_version=pytorch_inference_latest_py_version, sagemaker_session=sagemaker_session, ) predictor = model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name) batch_size = 100 data = numpy.random.rand(batch_size, 1, 28, 28).astype(numpy.float32) output = predictor.predict(data) assert output.shape == (batch_size, 10)
def deploy_endpoint(session, client, endpoint_name, setting, pytorch): sagemaker_session = sagemaker.Session( boto_session=session, sagemaker_client=client) conf = yaml.load(open(setting)) model_args = conf['model'] model_args['sagemaker_session'] = sagemaker_session model_args['name'] = endpoint_name + '-model-' + dt.now().strftime('%y%m%d%H%M') if pytorch: model = PyTorchModel(**model_args) else: model = ChainerModel(**model_args) deploy_args = conf['deploy'] deploy_args['endpoint_name'] = endpoint_name model.deploy(**deploy_args)
def test_deploy_model(pytorch_training_job, sagemaker_session): endpoint_name = 'test-pytorch-deploy-model-{}'.format(sagemaker_timestamp()) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): desc = sagemaker_session.sagemaker_client.describe_training_job(TrainingJobName=pytorch_training_job) model_data = desc['ModelArtifacts']['S3ModelArtifacts'] model = PyTorchModel(model_data, 'SageMakerRole', entry_point=MNIST_SCRIPT, sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name) batch_size = 100 data = numpy.random.rand(batch_size, 1, 28, 28).astype(numpy.float32) output = predictor.predict(data) assert output.shape == (batch_size, 10)
def test_deploy_model(pytorch_training_job, sagemaker_session): endpoint_name = 'test-pytorch-deploy-model-{}'.format(sagemaker_timestamp()) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): desc = sagemaker_session.sagemaker_client.describe_training_job( TrainingJobName=pytorch_training_job) model_data = desc['ModelArtifacts']['S3ModelArtifacts'] model = PyTorchModel(model_data, 'SageMakerRole', entry_point=MNIST_SCRIPT, sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name) batch_size = 100 data = numpy.random.rand(batch_size, 1, 28, 28).astype(numpy.float32) output = predictor.predict(data) assert output.shape == (batch_size, 10)
def deploy_model_into_sagemaker(estimator, role, server_source_dir: str, endpoint_name: str, aws_account_id: str, aws_region: str, device: str, debug: bool): instance_type, image_version = __get_instance_info(device=device, debug=debug, mode="inference") image_url_inference = "{}.dkr.ecr.{}.amazonaws.com/youyakuman:{}".format( aws_account_id, aws_region, image_version) p_model = PyTorchModel(model_data=estimator.model_data, image=image_url_inference, role=role, framework_version=estimator.framework_version, entry_point=estimator.entry_point, source_dir=server_source_dir) predictor = p_model.deploy(initial_instance_count=1, instance_type=instance_type, endpoint_name=endpoint_name) return predictor