def main(): sagemaker_session = LocalSession() sagemaker_session.config = {'local': {'local_code': True}} # For local training a dummy role will be sufficient role = 'arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001' print('Deploying local mode endpoint') print( 'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.' ) pytorch_model = PyTorchModel(model_data='./model/model.tar.gz', role=role, framework_version="1.7.1", source_dir="code", py_version="py3", entry_point="inference.py") predictor = pytorch_model.deploy(initial_instance_count=1, instance_type='local') predictor.serializer = sagemaker.serializers.JSONSerializer() predictor.deserializer = sagemaker.deserializers.JSONDeserializer() result = predictor.predict("אני אוהב לעבוד באמזון") print('result: {}'.format(result)) predictor.delete_endpoint(predictor.endpoint)
def test_deploy_packed_model_with_entry_point_name( sagemaker_session, cpu_instance_type, pytorch_inference_latest_version, pytorch_inference_latest_py_version, ): endpoint_name = "test-pytorch-deploy-model-{}".format(sagemaker_timestamp()) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model_data = sagemaker_session.upload_data(path=PACKED_MODEL) model = PyTorchModel( model_data, "SageMakerRole", entry_point="mnist.py", framework_version=pytorch_inference_latest_version, py_version=pytorch_inference_latest_py_version, sagemaker_session=sagemaker_session, ) predictor = model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name) batch_size = 100 data = numpy.random.rand(batch_size, 1, 28, 28).astype(numpy.float32) output = predictor.predict(data) assert output.shape == (batch_size, 10)
def test_deploy_model_with_accelerator( sagemaker_session, cpu_instance_type, pytorch_eia_latest_version, pytorch_eia_latest_py_version, ): endpoint_name = "test-pytorch-deploy-eia-{}".format(sagemaker_timestamp()) model_data = sagemaker_session.upload_data(path=EIA_MODEL) pytorch = PyTorchModel( model_data, "SageMakerRole", entry_point=EIA_SCRIPT, framework_version=pytorch_eia_latest_version, py_version=pytorch_eia_latest_py_version, sagemaker_session=sagemaker_session, ) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): predictor = pytorch.deploy( initial_instance_count=1, instance_type=cpu_instance_type, accelerator_type="ml.eia1.medium", endpoint_name=endpoint_name, ) batch_size = 100 data = numpy.random.rand(batch_size, 1, 28, 28).astype(numpy.float32) output = predictor.predict(data) assert output.shape == (batch_size, 10)
def run_as_local_main(): args = parse_infer_args() sm_boto3 = boto3.client('sagemaker') sess = sagemaker.Session() region = sess.boto_session.region_name model_url = args.model_file model = PyTorchModel(model_data=model_url, source_dir=os.path.abspath(os.path.dirname(__file__)), role=get_sm_execution_role(ON_SAGEMAKER_NOTEBOOK, region), framework_version='1.0.0', entry_point='inference.py') infer_mode = args.infer_mode if 'bt' == infer_mode: env = {'MODEL_SERVER_TIMEOUT': '120'} transformer = model.transformer( instance_count=1, instance_type='ml.c5.xlarge', output_path=args.output_dir, max_payload=99, env=env, max_concurrent_transforms=1, tags=[{ "Key": "Project", "Value": "SM Example" }], ) transformer.transform(args.input_file, content_type="text/csv") transformer.wait() elif 'ep' == infer_mode: model.deploy(instance_type='ml.c5.xlarge', initial_instance_count=1) else: raise Exception(f'Unknown inference mode {infer_mode}')
def test_deploy_model( pytorch_training_job, sagemaker_session, cpu_instance_type, pytorch_inference_latest_version, pytorch_inference_latest_py_version, ): endpoint_name = "test-pytorch-deploy-model-{}".format(sagemaker_timestamp()) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): desc = sagemaker_session.sagemaker_client.describe_training_job( TrainingJobName=pytorch_training_job ) model_data = desc["ModelArtifacts"]["S3ModelArtifacts"] model = PyTorchModel( model_data, "SageMakerRole", entry_point=MNIST_SCRIPT, framework_version=pytorch_inference_latest_version, py_version=pytorch_inference_latest_py_version, sagemaker_session=sagemaker_session, ) predictor = model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name) batch_size = 100 data = numpy.random.rand(batch_size, 1, 28, 28).astype(numpy.float32) output = predictor.predict(data) assert output.shape == (batch_size, 10)
def batch_inference(session, client, model_name, setting, pytorch): sagemaker_session = sagemaker.Session(boto_session=session, sagemaker_client=client) conf = yaml.load(open(setting)) # check the target model exists if _model_exists(client, model_name): logger.info('use the registered model.') deploy_args = conf['deploy'] deploy_args['model_name'] = model_name deploy_args['base_transform_job_name'] = model_name deploy_args['sagemaker_session'] = sagemaker_session transformer = Transformer(**deploy_args) else: # [TODO] updateing case (delete and create). # Basically, models have dependencies on multiple endpoints and inference jobs, # so it is not easy to delete it. logger.info('register the new model.') model_args = conf['model'] model_args['sagemaker_session'] = sagemaker_session model_args['name'] = model_name if pytorch: model = PyTorchModel(**model_args) else: model = ChainerModel(**model_args) deploy_args = conf['deploy'] transformer = model.transformer(**deploy_args) # register model transform_args = conf['transform'] # use default job_name (model_name + datetime.now()) transformer.transform(**transform_args)
def test_compile_and_deploy_model_with_neo( sagemaker_session, neo_pytorch_cpu_instance_type, neo_pytorch_latest_version, neo_pytorch_latest_py_version, neo_pytorch_target_device, neo_pytorch_compilation_job_name, ): endpoint_name = "test-neo-pytorch-deploy-model-{}".format( sagemaker_timestamp()) model_data = sagemaker_session.upload_data(path=NEO_MODEL) bucket = sagemaker_session.default_bucket() with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model = PyTorchModel( model_data=model_data, predictor_cls=Predictor, role="SageMakerRole", entry_point=NEO_SCRIPT, source_dir=NEO_CODE_DIR, framework_version=neo_pytorch_latest_version, py_version=neo_pytorch_latest_py_version, sagemaker_session=sagemaker_session, env={"MMS_DEFAULT_RESPONSE_TIMEOUT": "500"}, ) data_shape = '{"input0":[1,3,224,224]}' compiled_model_path = "s3://{}/{}/output".format( bucket, neo_pytorch_compilation_job_name) compiled_model = model.compile( target_instance_family=neo_pytorch_target_device, input_shape=data_shape, job_name=neo_pytorch_compilation_job_name, role="SageMakerRole", framework="pytorch", framework_version=neo_pytorch_latest_version, output_path=compiled_model_path, ) # Load names for ImageNet classes object_categories = {} with open(NEO_IMAGENET_CLASSES, "r") as f: for line in f: if line.strip(): key, val = line.strip().split(":") object_categories[key] = val with open(NEO_INFERENCE_IMAGE, "rb") as f: payload = f.read() payload = bytearray(payload) predictor = compiled_model.deploy(1, neo_pytorch_cpu_instance_type, endpoint_name=endpoint_name) response = predictor.predict(payload) result = json.loads(response.decode()) assert "tiger cat" in object_categories[str(np.argmax(result))] assert compiled_model.framework_version == neo_pytorch_latest_version
def test_deploy_model(pytorch_training_job, sagemaker_session): endpoint_name = 'test-pytorch-deploy-model-{}'.format(sagemaker_timestamp()) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): desc = sagemaker_session.sagemaker_client.describe_training_job(TrainingJobName=pytorch_training_job) model_data = desc['ModelArtifacts']['S3ModelArtifacts'] model = PyTorchModel(model_data, 'SageMakerRole', entry_point=MNIST_SCRIPT, sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name) batch_size = 100 data = numpy.random.rand(batch_size, 1, 28, 28).astype(numpy.float32) output = predictor.predict(data) assert output.shape == (batch_size, 10)
def create_model( self, model_server_workers=None, role=None, vpc_config_override=VPC_CONFIG_DEFAULT, entry_point=None, source_dir=None, dependencies=None, **kwargs ): """Create a SageMaker ``PyTorchModel`` object that can be deployed to an ``Endpoint``. Args: model_server_workers (int): Optional. The number of worker processes used by the inference server. If None, server will use one worker per vCPU. role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, which is also used during transform jobs. If not specified, the role from the Estimator will be used. vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. entry_point (str): Path (absolute or relative) to the local Python source file which should be executed as the entry point to training. If not specified, the training entry point is used. source_dir (str): Path (absolute or relative) to a directory with any other serving source code dependencies aside from the entry point file. If not specified, the model source directory from training is used. dependencies (list[str]): A list of paths to directories (absolute or relative) with any additional libraries that will be exported to the container. If not specified, the dependencies from training are used. **kwargs: Additional kwargs passed to the :class:`~sagemaker.pytorch.model.PyTorchModel` constructor. Returns: sagemaker.pytorch.model.PyTorchModel: A SageMaker ``PyTorchModel`` object. See :func:`~sagemaker.pytorch.model.PyTorchModel` for full details. """ if "image" not in kwargs: kwargs["image"] = self.image_name return PyTorchModel( self.model_data, role or self.role, entry_point or self.entry_point, source_dir=(source_dir or self._model_source_dir()), enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, name=self._current_job_name, container_log_level=self.container_log_level, code_location=self.code_location, py_version=self.py_version, framework_version=self.framework_version, model_server_workers=model_server_workers, sagemaker_session=self.sagemaker_session, vpc_config=self.get_vpc_config(vpc_config_override), dependencies=(dependencies or self.dependencies), **kwargs )
def test_deploy_model(pytorch_training_job, sagemaker_session): endpoint_name = 'test-pytorch-deploy-model-{}'.format(sagemaker_timestamp()) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): desc = sagemaker_session.sagemaker_client.describe_training_job( TrainingJobName=pytorch_training_job) model_data = desc['ModelArtifacts']['S3ModelArtifacts'] model = PyTorchModel(model_data, 'SageMakerRole', entry_point=MNIST_SCRIPT, sagemaker_session=sagemaker_session) predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name) batch_size = 100 data = numpy.random.rand(batch_size, 1, 28, 28).astype(numpy.float32) output = predictor.predict(data) assert output.shape == (batch_size, 10)
def create_model(self, model_server_workers=None): """Create a SageMaker ``PyTorchModel`` object that can be deployed to an ``Endpoint``. Args: model_server_workers (int): Optional. The number of worker processes used by the inference server. If None, server will use one worker per vCPU. Returns: sagemaker.pytorch.model.PyTorchModel: A SageMaker ``PyTorchModel`` object. See :func:`~sagemaker.pytorch.model.PyTorchModel` for full details. """ return PyTorchModel( self.model_data, self.role, self.entry_point, source_dir=self._model_source_dir(), enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, name=self._current_job_name, container_log_level=self.container_log_level, code_location=self.code_location, py_version=self.py_version, framework_version=self.framework_version, image=self.image_name, model_server_workers=model_server_workers, sagemaker_session=self.sagemaker_session)
def deploy_model_into_sagemaker(estimator, role, server_source_dir: str, endpoint_name: str, aws_account_id: str, aws_region: str, device: str, debug: bool): instance_type, image_version = __get_instance_info(device=device, debug=debug, mode="inference") image_url_inference = "{}.dkr.ecr.{}.amazonaws.com/youyakuman:{}".format( aws_account_id, aws_region, image_version) p_model = PyTorchModel(model_data=estimator.model_data, image=image_url_inference, role=role, framework_version=estimator.framework_version, entry_point=estimator.entry_point, source_dir=server_source_dir) predictor = p_model.deploy(initial_instance_count=1, instance_type=instance_type, endpoint_name=endpoint_name) return predictor
def deploy_endpoint(session, client, endpoint_name, setting, pytorch): sagemaker_session = sagemaker.Session( boto_session=session, sagemaker_client=client) conf = yaml.load(open(setting)) model_args = conf['model'] model_args['sagemaker_session'] = sagemaker_session model_args['name'] = endpoint_name + '-model-' + dt.now().strftime('%y%m%d%H%M') if pytorch: model = PyTorchModel(**model_args) else: model = ChainerModel(**model_args) deploy_args = conf['deploy'] deploy_args['endpoint_name'] = endpoint_name model.deploy(**deploy_args)
def test_jumpstart_catboost_image_uri(patched_get_model_specs, session): patched_get_model_specs.side_effect = get_prototype_model_spec model_id, model_version = "catboost-classification-model", "*" instance_type = "ml.p2.xlarge" region = "us-west-2" model_specs = accessors.JumpStartModelsAccessor.get_model_specs( region, model_id, model_version) # inference uri = image_uris.retrieve( framework=None, region=region, image_scope="inference", model_id=model_id, model_version=model_version, instance_type=instance_type, ) framework_class_uri = PyTorchModel( role="mock_role", model_data="mock_data", entry_point="mock_entry_point", framework_version=model_specs.hosting_ecr_specs.framework_version, py_version=model_specs.hosting_ecr_specs.py_version, sagemaker_session=session, ).serving_image_uri(region, instance_type) assert uri == framework_class_uri assert uri == "763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-inference:1.9.0-gpu-py38" # training uri = image_uris.retrieve( framework=None, region=region, image_scope="training", model_id=model_id, model_version=model_version, instance_type=instance_type, ) framework_class_uri = PyTorch( role="mock_role", entry_point="mock_entry_point", framework_version=model_specs.training_ecr_specs.framework_version, py_version=model_specs.training_ecr_specs.py_version, instance_type=instance_type, instance_count=1, sagemaker_session=session, ).training_image_uri(region=region) assert uri == framework_class_uri assert uri == "763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-training:1.9.0-gpu-py38"
def create_model(self, model_server_workers=None, role=None): """Create a SageMaker ``PyTorchModel`` object that can be deployed to an ``Endpoint``. Args: role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, which is also used during transform jobs. If not specified, the role from the Estimator will be used. model_server_workers (int): Optional. The number of worker processes used by the inference server. If None, server will use one worker per vCPU. Returns: sagemaker.pytorch.model.PyTorchModel: A SageMaker ``PyTorchModel`` object. See :func:`~sagemaker.pytorch.model.PyTorchModel` for full details. """ role = role or self.role return PyTorchModel(self.model_data, role, self.entry_point, source_dir=self._model_source_dir(), enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, name=self._current_job_name, container_log_level=self.container_log_level, code_location=self.code_location, py_version=self.py_version, framework_version=self.framework_version, image=self.image_name, model_server_workers=model_server_workers, sagemaker_session=self.sagemaker_session)
def create_model(self, model_server_workers=None, role=None, vpc_config_override=VPC_CONFIG_DEFAULT): """Create a SageMaker ``PyTorchModel`` object that can be deployed to an ``Endpoint``. Args: role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, which is also used during transform jobs. If not specified, the role from the Estimator will be used. model_server_workers (int): Optional. The number of worker processes used by the inference server. If None, server will use one worker per vCPU. vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. Returns: sagemaker.pytorch.model.PyTorchModel: A SageMaker ``PyTorchModel`` object. See :func:`~sagemaker.pytorch.model.PyTorchModel` for full details. """ role = role or self.role return PyTorchModel( self.model_data, role, self.entry_point, source_dir=self._model_source_dir(), enable_cloudwatch_metrics=self.enable_cloudwatch_metrics, name=self._current_job_name, container_log_level=self.container_log_level, code_location=self.code_location, py_version=self.py_version, framework_version=self.framework_version, image=self.image_name, model_server_workers=model_server_workers, sagemaker_session=self.sagemaker_session, vpc_config=self.get_vpc_config(vpc_config_override), dependencies=self.dependencies, )