def test_endpoint_from_production_variants(sagemaker_session): ims = sagemaker_session ims.sagemaker_client.describe_endpoint = Mock( return_value={'EndpointStatus': 'InService'}) pvs = [ sagemaker.production_variant('A', 'ml.p2.xlarge'), sagemaker.production_variant('B', 'p299.4096xlarge') ] ex = ClientError( { 'Error': { 'Code': 'ValidationException', 'Message': 'Could not find your thing' } }, 'b') ims.sagemaker_client.describe_endpoint_config = Mock(side_effect=ex) sagemaker_session.endpoint_from_production_variants('some-endpoint', pvs) sagemaker_session.sagemaker_client.create_endpoint.assert_called_with( EndpointConfigName='some-endpoint', EndpointName='some-endpoint') sagemaker_session.sagemaker_client.create_endpoint_config.assert_called_with( EndpointConfigName='some-endpoint', ProductionVariants=[{ 'InstanceType': 'ml.p2.xlarge', 'ModelName': 'A', 'InitialVariantWeight': 1, 'InitialInstanceCount': 1, 'VariantName': 'AllTraffic' }, { 'InstanceType': 'p299.4096xlarge', 'ModelName': 'B', 'InitialVariantWeight': 1, 'InitialInstanceCount': 1, 'VariantName': 'AllTraffic' }])
def test_endpoint_from_production_variants_with_tags(sagemaker_session): ims = sagemaker_session ims.sagemaker_client.describe_endpoint = Mock(return_value={'EndpointStatus': 'InService'}) pvs = [sagemaker.production_variant('A', 'ml.p2.xlarge'), sagemaker.production_variant('B', 'p299.4096xlarge')] ex = ClientError({'Error': {'Code': 'ValidationException', 'Message': 'Could not find your thing'}}, 'b') ims.sagemaker_client.describe_endpoint_config = Mock(side_effect=ex) tags = [{'ModelName': 'TestModel'}] sagemaker_session.endpoint_from_production_variants('some-endpoint', pvs, tags) sagemaker_session.sagemaker_client.create_endpoint.assert_called_with(EndpointConfigName='some-endpoint', EndpointName='some-endpoint') sagemaker_session.sagemaker_client.create_endpoint_config.assert_called_with( EndpointConfigName='some-endpoint', ProductionVariants=[ { 'InstanceType': 'ml.p2.xlarge', 'ModelName': 'A', 'InitialVariantWeight': 1, 'InitialInstanceCount': 1, 'VariantName': 'AllTraffic' }, { 'InstanceType': 'p299.4096xlarge', 'ModelName': 'B', 'InitialVariantWeight': 1, 'InitialInstanceCount': 1, 'VariantName': 'AllTraffic'}], Tags=tags)
def test_endpoint_from_production_variants_with_accelerator_type( sagemaker_session): ims = sagemaker_session ims.sagemaker_client.describe_endpoint = Mock( return_value={"EndpointStatus": "InService"}) pvs = [ sagemaker.production_variant("A", "ml.p2.xlarge", accelerator_type=ACCELERATOR_TYPE), sagemaker.production_variant("B", "p299.4096xlarge", accelerator_type=ACCELERATOR_TYPE), ] ex = ClientError( { "Error": { "Code": "ValidationException", "Message": "Could not find your thing" } }, "b") ims.sagemaker_client.describe_endpoint_config = Mock(side_effect=ex) tags = [{"ModelName": "TestModel"}] sagemaker_session.endpoint_from_production_variants( "some-endpoint", pvs, tags) sagemaker_session.sagemaker_client.create_endpoint.assert_called_with( EndpointConfigName="some-endpoint", EndpointName="some-endpoint", Tags=tags) sagemaker_session.sagemaker_client.create_endpoint_config.assert_called_with( EndpointConfigName="some-endpoint", ProductionVariants=pvs, Tags=tags)
def test_endpoint_from_production_variants_with_accelerator_type( sagemaker_session): ims = sagemaker_session ims.sagemaker_client.describe_endpoint = Mock( return_value={'EndpointStatus': 'InService'}) pvs = [ sagemaker.production_variant('A', 'ml.p2.xlarge', accelerator_type=ACCELERATOR_TYPE), sagemaker.production_variant('B', 'p299.4096xlarge', accelerator_type=ACCELERATOR_TYPE) ] ex = ClientError( { 'Error': { 'Code': 'ValidationException', 'Message': 'Could not find your thing' } }, 'b') ims.sagemaker_client.describe_endpoint_config = Mock(side_effect=ex) tags = [{'ModelName': 'TestModel'}] sagemaker_session.endpoint_from_production_variants( 'some-endpoint', pvs, tags) sagemaker_session.sagemaker_client.create_endpoint.assert_called_with( EndpointConfigName='some-endpoint', EndpointName='some-endpoint') sagemaker_session.sagemaker_client.create_endpoint_config.assert_called_with( EndpointConfigName='some-endpoint', ProductionVariants=pvs, Tags=tags)
def deploy(self, initial_instance_count, instance_type, endpoint_name=None, tags=None, wait=True): """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``. Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an ``Endpoint`` from this ``Model``. If ``self.predictor_cls`` is not None, this method returns a the result of invoking ``self.predictor_cls`` on the created endpoint name. The name of the created model is accessible in the ``name`` field of this ``Model`` after deploy returns The name of the created endpoint is accessible in the ``endpoint_name`` field of this ``Model`` after deploy returns. Args: initial_instance_count (int): The initial number of instances to run in the ``Endpoint`` created from this ``Model``. instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. endpoint_name (str): The name of the endpoint to create (default: None). If not specified, a unique endpoint name will be created. tags (List[dict[str, str]]): The list of tags to attach to this specific endpoint. wait (bool): Whether the call should wait until the deployment of model completes (default: True). Returns: callable[string, sagemaker.session.Session] or None: Invocation of ``self.predictor_cls`` on the created endpoint name, if ``self.predictor_cls`` is not None. Otherwise, return None. """ if not self.sagemaker_session: self.sagemaker_session = Session() containers = self.pipeline_container_def(instance_type) self.name = self.name or name_from_image(containers[0]["Image"]) self.sagemaker_session.create_model(self.name, self.role, containers, vpc_config=self.vpc_config) production_variant = sagemaker.production_variant( self.name, instance_type, initial_instance_count) self.endpoint_name = endpoint_name or self.name self.sagemaker_session.endpoint_from_production_variants( self.endpoint_name, [production_variant], tags, wait=wait) if self.predictor_cls: return self.predictor_cls(self.endpoint_name, self.sagemaker_session) return None
def deploy_config(model, initial_instance_count, instance_type, endpoint_name=None, tags=None): """Export Airflow deploy config from a SageMaker model Args: model (sagemaker.model.Model): The SageMaker model to export the Airflow config from. initial_instance_count (int): The initial number of instances to run in the ``Endpoint`` created from this ``Model``. instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. endpoint_name (str): The name of the endpoint to create (default: None). If not specified, a unique endpoint name will be created. tags (list[dict]): List of tags for labeling a training job. For more, see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html. Returns: dict: Deploy config that can be directly used by SageMakerEndpointOperator in Airflow. """ model_base_config = model_config(instance_type, model) production_variant = sagemaker.production_variant(model.name, instance_type, initial_instance_count) name = model.name config_options = { "EndpointConfigName": name, "ProductionVariants": [production_variant] } if tags is not None: config_options["Tags"] = tags endpoint_name = endpoint_name or name endpoint_base_config = { "EndpointName": endpoint_name, "EndpointConfigName": name } config = { "Model": model_base_config, "EndpointConfig": config_options, "Endpoint": endpoint_base_config, } # if there is s3 operations needed for model, move it to root level of config s3_operations = model_base_config.pop("S3Operations", None) if s3_operations is not None: config["S3Operations"] = s3_operations return config
def deploy(self, initial_instance_count, instance_type, accelerator_type=None, endpoint_name=None, tags=None): """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``. Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an ``Endpoint`` from this ``Model``. If ``self.predictor_cls`` is not None, this method returns a the result of invoking ``self.predictor_cls`` on the created endpoint name. The name of the created model is accessible in the ``name`` field of this ``Model`` after deploy returns The name of the created endpoint is accessible in the ``endpoint_name`` field of this ``Model`` after deploy returns. Args: instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. initial_instance_count (int): The initial number of instances to run in the ``Endpoint`` created from this ``Model``. accelerator_type (str): Type of Elastic Inference accelerator to deploy this model for model loading and inference, for example, 'ml.eia1.medium'. If not specified, no Elastic Inference accelerator will be attached to the endpoint. For more information: https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html endpoint_name (str): The name of the endpoint to create (default: None). If not specified, a unique endpoint name will be created. tags(List[dict[str, str]]): The list of tags to attach to this specific endpoint. Returns: callable[string, sagemaker.session.Session] or None: Invocation of ``self.predictor_cls`` on the created endpoint name, if ``self.predictor_cls`` is not None. Otherwise, return None. """ if not self.sagemaker_session: if instance_type in ('local', 'local_gpu'): self.sagemaker_session = local.LocalSession() else: self.sagemaker_session = session.Session() if self.role is None: raise ValueError("Role can not be null for deploying a model") compiled_model_suffix = '-'.join(instance_type.split('.')[:-1]) if self._is_compiled_model: self.name += compiled_model_suffix self._create_sagemaker_model(instance_type, accelerator_type) production_variant = sagemaker.production_variant(self.name, instance_type, initial_instance_count, accelerator_type=accelerator_type) if endpoint_name: self.endpoint_name = endpoint_name else: self.endpoint_name = self.name if self._is_compiled_model and not self.endpoint_name.endswith(compiled_model_suffix): self.endpoint_name += compiled_model_suffix self.sagemaker_session.endpoint_from_production_variants(self.endpoint_name, [production_variant], tags) if self.predictor_cls: return self.predictor_cls(self.endpoint_name, self.sagemaker_session)
def deploy(self, initial_instance_count, instance_type, endpoint_name=None, tags=None): """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``. Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an ``Endpoint`` from this ``Model``. If ``self.predictor_cls`` is not None, this method returns a the result of invoking ``self.predictor_cls`` on the created endpoint name. The name of the created model is accessible in the ``name`` field of this ``Model`` after deploy returns The name of the created endpoint is accessible in the ``endpoint_name`` field of this ``Model`` after deploy returns. Args: instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. initial_instance_count (int): The initial number of instances to run in the ``Endpoint`` created from this ``Model``. endpoint_name (str): The name of the endpoint to create (default: None). If not specified, a unique endpoint name will be created. Returns: callable[string, sagemaker.session.Session] or None: Invocation of ``self.predictor_cls`` on the created endpoint name, if ``self.predictor_cls`` is not None. Otherwise, return None. """ if not self.sagemaker_session: if instance_type in ('local', 'local_gpu'): self.sagemaker_session = LocalSession() else: self.sagemaker_session = Session() container_def = self.prepare_container_def(instance_type) self.name = self.name or name_from_image(container_def['Image']) self.sagemaker_session.create_model(self.name, self.role, container_def, vpc_config=self.vpc_config) production_variant = sagemaker.production_variant( self.name, instance_type, initial_instance_count) self.endpoint_name = endpoint_name or self.name self.sagemaker_session.endpoint_from_production_variants( self.endpoint_name, [production_variant], tags) if self.predictor_cls: return self.predictor_cls(self.endpoint_name, self.sagemaker_session)
def deploy(self, initial_instance_count, instance_type, endpoint_name=None, tags=None): """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``. Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an ``Endpoint`` from this ``Model``. If ``self.predictor_cls`` is not None, this method returns a the result of invoking ``self.predictor_cls`` on the created endpoint name. The name of the created endpoint is accessible in the ``endpoint_name`` field of this ``Model`` after deploy returns. Args: instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. initial_instance_count (int): The initial number of instances to run in the ``Endpoint`` created from this ``Model``. endpoint_name (str): The name of the endpoint to create (default: None). If not specified, a unique endpoint name will be created. tags (list[dict[str, str]]): A list of key-value pairs for tagging the endpoint (default: None). Returns: callable[string, sagemaker.session.Session] or None: Invocation of ``self.predictor_cls`` on the created endpoint name, if ``self.predictor_cls`` is not None. Otherwise, return None. """ if not self.sagemaker_session: if instance_type in ('local', 'local_gpu'): self.sagemaker_session = LocalSession() else: self.sagemaker_session = Session() container_def = self.prepare_container_def(instance_type) model_name = self.name or name_from_image(container_def['Image']) self.sagemaker_session.create_model(model_name, self.role, container_def) production_variant = sagemaker.production_variant(model_name, instance_type, initial_instance_count) self.endpoint_name = endpoint_name or model_name self.sagemaker_session.endpoint_from_production_variants(self.endpoint_name, [production_variant], tags) if self.predictor_cls: return self.predictor_cls(self.endpoint_name, self.sagemaker_session)
def deploy( self, initial_instance_count, instance_type, accelerator_type=None, endpoint_name=None, update_endpoint=False, tags=None, kms_key=None, wait=True, ): """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``. Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an ``Endpoint`` from this ``Model``. If ``self.predictor_cls`` is not None, this method returns a the result of invoking ``self.predictor_cls`` on the created endpoint name. The name of the created model is accessible in the ``name`` field of this ``Model`` after deploy returns The name of the created endpoint is accessible in the ``endpoint_name`` field of this ``Model`` after deploy returns. Args: instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. initial_instance_count (int): The initial number of instances to run in the ``Endpoint`` created from this ``Model``. accelerator_type (str): Type of Elastic Inference accelerator to deploy this model for model loading and inference, for example, 'ml.eia1.medium'. If not specified, no Elastic Inference accelerator will be attached to the endpoint. For more information: https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html endpoint_name (str): The name of the endpoint to create (default: None). If not specified, a unique endpoint name will be created. update_endpoint (bool): Flag to update the model in an existing Amazon SageMaker endpoint. If True, this will deploy a new EndpointConfig to an already existing endpoint and delete resources corresponding to the previous EndpointConfig. If False, a new endpoint will be created. Default: False tags(List[dict[str, str]]): The list of tags to attach to this specific endpoint. kms_key (str): The ARN of the KMS key that is used to encrypt the data on the storage volume attached to the instance hosting the endpoint. wait (bool): Whether the call should wait until the deployment of this model completes (default: True). Returns: callable[string, sagemaker.session.Session] or None: Invocation of ``self.predictor_cls`` on the created endpoint name, if ``self.predictor_cls`` is not None. Otherwise, return None. """ if not self.sagemaker_session: if instance_type in ("local", "local_gpu"): self.sagemaker_session = local.LocalSession() else: self.sagemaker_session = session.Session() if self.role is None: raise ValueError("Role can not be null for deploying a model") compiled_model_suffix = "-".join(instance_type.split(".")[:-1]) if self._is_compiled_model: self.name += compiled_model_suffix self._create_sagemaker_model(instance_type, accelerator_type, tags) production_variant = sagemaker.production_variant( self.name, instance_type, initial_instance_count, accelerator_type=accelerator_type) if endpoint_name: self.endpoint_name = endpoint_name else: self.endpoint_name = self.name if self._is_compiled_model and not self.endpoint_name.endswith( compiled_model_suffix): self.endpoint_name += compiled_model_suffix if update_endpoint: endpoint_config_name = self.sagemaker_session.create_endpoint_config( name=self.name, model_name=self.name, initial_instance_count=initial_instance_count, instance_type=instance_type, accelerator_type=accelerator_type, tags=tags, kms_key=kms_key, ) self.sagemaker_session.update_endpoint(self.endpoint_name, endpoint_config_name) else: self.sagemaker_session.endpoint_from_production_variants( self.endpoint_name, [production_variant], tags, kms_key, wait) if self.predictor_cls: return self.predictor_cls(self.endpoint_name, self.sagemaker_session)
def deploy( self, initial_instance_count, instance_type, serializer=None, deserializer=None, endpoint_name=None, tags=None, wait=True, update_endpoint=False, data_capture_config=None, ): """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``. Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an ``Endpoint`` from this ``Model``. If ``self.predictor_cls`` is not None, this method returns a the result of invoking ``self.predictor_cls`` on the created endpoint name. The name of the created model is accessible in the ``name`` field of this ``Model`` after deploy returns The name of the created endpoint is accessible in the ``endpoint_name`` field of this ``Model`` after deploy returns. Args: initial_instance_count (int): The initial number of instances to run in the ``Endpoint`` created from this ``Model``. instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. serializer (:class:`~sagemaker.serializers.BaseSerializer`): A serializer object, used to encode data for an inference endpoint (default: None). If ``serializer`` is not None, then ``serializer`` will override the default serializer. The default serializer is set by the ``predictor_cls``. deserializer (:class:`~sagemaker.deserializers.BaseDeserializer`): A deserializer object, used to decode data from an inference endpoint (default: None). If ``deserializer`` is not None, then ``deserializer`` will override the default deserializer. The default deserializer is set by the ``predictor_cls``. endpoint_name (str): The name of the endpoint to create (default: None). If not specified, a unique endpoint name will be created. tags (List[dict[str, str]]): The list of tags to attach to this specific endpoint. wait (bool): Whether the call should wait until the deployment of model completes (default: True). update_endpoint (bool): Flag to update the model in an existing Amazon SageMaker endpoint. If True, this will deploy a new EndpointConfig to an already existing endpoint and delete resources corresponding to the previous EndpointConfig. If False, a new endpoint will be created. Default: False data_capture_config (sagemaker.model_monitor.DataCaptureConfig): Specifies configuration related to Endpoint data capture for use with Amazon SageMaker Model Monitoring. Default: None. Returns: callable[string, sagemaker.session.Session] or None: Invocation of ``self.predictor_cls`` on the created endpoint name, if ``self.predictor_cls`` is not None. Otherwise, return None. """ if not self.sagemaker_session: self.sagemaker_session = Session() containers = self.pipeline_container_def(instance_type) self.name = self.name or name_from_image(containers[0]["Image"]) self.sagemaker_session.create_model(self.name, self.role, containers, vpc_config=self.vpc_config) production_variant = sagemaker.production_variant( self.name, instance_type, initial_instance_count) self.endpoint_name = endpoint_name or self.name data_capture_config_dict = None if data_capture_config is not None: data_capture_config_dict = data_capture_config._to_request_dict() if update_endpoint: endpoint_config_name = self.sagemaker_session.create_endpoint_config( name=self.name, model_name=self.name, initial_instance_count=initial_instance_count, instance_type=instance_type, tags=tags, data_capture_config_dict=data_capture_config_dict, ) self.sagemaker_session.update_endpoint(self.endpoint_name, endpoint_config_name, wait=wait) else: self.sagemaker_session.endpoint_from_production_variants( name=self.endpoint_name, production_variants=[production_variant], tags=tags, wait=wait, data_capture_config_dict=data_capture_config_dict, ) if self.predictor_cls: predictor = self.predictor_cls(self.endpoint_name, self.sagemaker_session) if serializer: predictor.serializer = serializer if deserializer: predictor.deserializer = deserializer return predictor return None
def deploy( self, initial_instance_count, instance_type, accelerator_type=None, endpoint_name=None, update_endpoint=False, tags=None, kms_key=None, wait=True, data_capture_config=None, ): """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``. Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an ``Endpoint`` from this ``Model``. If ``self.predictor_cls`` is not None, this method returns a the result of invoking ``self.predictor_cls`` on the created endpoint name. The name of the created model is accessible in the ``name`` field of this ``Model`` after deploy returns The name of the created endpoint is accessible in the ``endpoint_name`` field of this ``Model`` after deploy returns. Args: initial_instance_count (int): The initial number of instances to run in the ``Endpoint`` created from this ``Model``. instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge', or 'local' for local mode. accelerator_type (str): Type of Elastic Inference accelerator to deploy this model for model loading and inference, for example, 'ml.eia1.medium'. If not specified, no Elastic Inference accelerator will be attached to the endpoint. For more information: https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html endpoint_name (str): The name of the endpoint to create (default: None). If not specified, a unique endpoint name will be created. update_endpoint (bool): Flag to update the model in an existing Amazon SageMaker endpoint. If True, this will deploy a new EndpointConfig to an already existing endpoint and delete resources corresponding to the previous EndpointConfig. If False, a new endpoint will be created. Default: False tags (List[dict[str, str]]): The list of tags to attach to this specific endpoint. kms_key (str): The ARN of the KMS key that is used to encrypt the data on the storage volume attached to the instance hosting the endpoint. wait (bool): Whether the call should wait until the deployment of this model completes (default: True). data_capture_config (sagemaker.model_monitor.DataCaptureConfig): Specifies configuration related to Endpoint data capture for use with Amazon SageMaker Model Monitoring. Default: None. Returns: callable[string, sagemaker.session.Session] or None: Invocation of ``self.predictor_cls`` on the created endpoint name, if ``self.predictor_cls`` is not None. Otherwise, return None. """ self._init_sagemaker_session_if_does_not_exist(instance_type) if self.role is None: raise ValueError("Role can not be null for deploying a model") if instance_type.startswith("ml.inf") and not self._is_compiled_model: LOGGER.warning( "Your model is not compiled. Please compile your model before using Inferentia." ) compiled_model_suffix = "-".join(instance_type.split(".")[:-1]) if self._is_compiled_model: name_prefix = self.name or utils.name_from_image(self.image) self.name = "{}{}".format(name_prefix, compiled_model_suffix) self._create_sagemaker_model(instance_type, accelerator_type, tags) production_variant = sagemaker.production_variant( self.name, instance_type, initial_instance_count, accelerator_type=accelerator_type ) if endpoint_name: self.endpoint_name = endpoint_name else: self.endpoint_name = self.name if self._is_compiled_model and not self.endpoint_name.endswith(compiled_model_suffix): self.endpoint_name += compiled_model_suffix data_capture_config_dict = None if data_capture_config is not None: data_capture_config_dict = data_capture_config._to_request_dict() if update_endpoint: endpoint_config_name = self.sagemaker_session.create_endpoint_config( name=self.name, model_name=self.name, initial_instance_count=initial_instance_count, instance_type=instance_type, accelerator_type=accelerator_type, tags=tags, kms_key=kms_key, data_capture_config_dict=data_capture_config_dict, ) self.sagemaker_session.update_endpoint( self.endpoint_name, endpoint_config_name, wait=wait ) else: self.sagemaker_session.endpoint_from_production_variants( name=self.endpoint_name, production_variants=[production_variant], tags=tags, kms_key=kms_key, wait=wait, data_capture_config_dict=data_capture_config_dict, ) if self.predictor_cls: return self.predictor_cls(self.endpoint_name, self.sagemaker_session) return None
def deploy( self, initial_instance_count, instance_type, serializer=None, deserializer=None, accelerator_type=None, endpoint_name=None, tags=None, kms_key=None, wait=True, data_capture_config=None, ): """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``. Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an ``Endpoint`` from this ``Model``. If self.model is not None, then the ``Endpoint`` will be deployed with parameters in self.model (like vpc_config, enable_network_isolation, etc). If self.model is None, then use the parameters in ``MultiDataModel`` constructor will be used. If ``self.predictor_cls`` is not None, this method returns a the result of invoking ``self.predictor_cls`` on the created endpoint name. The name of the created model is accessible in the ``name`` field of this ``Model`` after deploy returns The name of the created endpoint is accessible in the ``endpoint_name`` field of this ``Model`` after deploy returns. Args: initial_instance_count (int): The initial number of instances to run in the ``Endpoint`` created from this ``Model``. instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge', or 'local' for local mode. serializer (:class:`~sagemaker.serializers.BaseSerializer`): A serializer object, used to encode data for an inference endpoint (default: None). If ``serializer`` is not None, then ``serializer`` will override the default serializer. The default serializer is set by the ``predictor_cls``. deserializer (:class:`~sagemaker.deserializers.BaseDeserializer`): A deserializer object, used to decode data from an inference endpoint (default: None). If ``deserializer`` is not None, then ``deserializer`` will override the default deserializer. The default deserializer is set by the ``predictor_cls``. accelerator_type (str): Type of Elastic Inference accelerator to deploy this model for model loading and inference, for example, 'ml.eia1.medium'. If not specified, no Elastic Inference accelerator will be attached to the endpoint. For more information: https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html endpoint_name (str): The name of the endpoint to create (default: None). If not specified, a unique endpoint name will be created. tags (List[dict[str, str]]): The list of tags to attach to this specific endpoint. kms_key (str): The ARN of the KMS key that is used to encrypt the data on the storage volume attached to the instance hosting the endpoint. wait (bool): Whether the call should wait until the deployment of this model completes (default: True). data_capture_config (sagemaker.model_monitor.DataCaptureConfig): Specifies configuration related to Endpoint data capture for use with Amazon SageMaker Model Monitoring. Default: None. Returns: callable[string, sagemaker.session.Session] or None: Invocation of ``self.predictor_cls`` on the created endpoint name, if ``self.predictor_cls`` is not None. Otherwise, return None. """ # Set model specific parameters if self.model: enable_network_isolation = self.model.enable_network_isolation() role = self.model.role vpc_config = self.model.vpc_config predictor_cls = self.model.predictor_cls else: enable_network_isolation = self.enable_network_isolation() role = self.role vpc_config = self.vpc_config predictor_cls = self.predictor_cls if role is None: raise ValueError("Role can not be null for deploying a model") if instance_type == "local" and not isinstance(self.sagemaker_session, local.LocalSession): self.sagemaker_session = local.LocalSession() container_def = self.prepare_container_def( instance_type, accelerator_type=accelerator_type) self.sagemaker_session.create_model( self.name, role, container_def, vpc_config=vpc_config, enable_network_isolation=enable_network_isolation, tags=tags, ) production_variant = sagemaker.production_variant( self.name, instance_type, initial_instance_count, accelerator_type=accelerator_type) if endpoint_name: self.endpoint_name = endpoint_name else: self.endpoint_name = self.name data_capture_config_dict = None if data_capture_config is not None: data_capture_config_dict = data_capture_config._to_request_dict() self.sagemaker_session.endpoint_from_production_variants( name=self.endpoint_name, production_variants=[production_variant], tags=tags, kms_key=kms_key, wait=wait, data_capture_config_dict=data_capture_config_dict, ) if predictor_cls: predictor = predictor_cls(self.endpoint_name, self.sagemaker_session) if serializer: predictor.serializer = serializer if deserializer: predictor.deserializer = deserializer return predictor return None
def deploy( self, initial_instance_count, instance_type, serializer=None, deserializer=None, accelerator_type=None, endpoint_name=None, tags=None, kms_key=None, wait=True, data_capture_config=None, **kwargs, ): """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``. Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an ``Endpoint`` from this ``Model``. If ``self.predictor_cls`` is not None, this method returns a the result of invoking ``self.predictor_cls`` on the created endpoint name. The name of the created model is accessible in the ``name`` field of this ``Model`` after deploy returns The name of the created endpoint is accessible in the ``endpoint_name`` field of this ``Model`` after deploy returns. Args: initial_instance_count (int): The initial number of instances to run in the ``Endpoint`` created from this ``Model``. instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge', or 'local' for local mode. serializer (:class:`~sagemaker.serializers.BaseSerializer`): A serializer object, used to encode data for an inference endpoint (default: None). If ``serializer`` is not None, then ``serializer`` will override the default serializer. The default serializer is set by the ``predictor_cls``. deserializer (:class:`~sagemaker.deserializers.BaseDeserializer`): A deserializer object, used to decode data from an inference endpoint (default: None). If ``deserializer`` is not None, then ``deserializer`` will override the default deserializer. The default deserializer is set by the ``predictor_cls``. accelerator_type (str): Type of Elastic Inference accelerator to deploy this model for model loading and inference, for example, 'ml.eia1.medium'. If not specified, no Elastic Inference accelerator will be attached to the endpoint. For more information: https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html endpoint_name (str): The name of the endpoint to create (default: None). If not specified, a unique endpoint name will be created. tags (List[dict[str, str]]): The list of tags to attach to this specific endpoint. kms_key (str): The ARN of the KMS key that is used to encrypt the data on the storage volume attached to the instance hosting the endpoint. wait (bool): Whether the call should wait until the deployment of this model completes (default: True). data_capture_config (sagemaker.model_monitor.DataCaptureConfig): Specifies configuration related to Endpoint data capture for use with Amazon SageMaker Model Monitoring. Default: None. Returns: callable[string, sagemaker.session.Session] or None: Invocation of ``self.predictor_cls`` on the created endpoint name, if ``self.predictor_cls`` is not None. Otherwise, return None. """ removed_kwargs("update_endpoint", kwargs) self._init_sagemaker_session_if_does_not_exist(instance_type) if self.role is None: raise ValueError("Role can not be null for deploying a model") if instance_type.startswith("ml.inf") and not self._is_compiled_model: LOGGER.warning( "Your model is not compiled. Please compile your model before using Inferentia." ) compiled_model_suffix = "-".join(instance_type.split(".")[:-1]) if self._is_compiled_model: self._ensure_base_name_if_needed(self.image_uri) if self._base_name is not None: self._base_name = "-".join( (self._base_name, compiled_model_suffix)) self._create_sagemaker_model(instance_type, accelerator_type, tags) production_variant = sagemaker.production_variant( self.name, instance_type, initial_instance_count, accelerator_type=accelerator_type) if endpoint_name: self.endpoint_name = endpoint_name else: base_endpoint_name = self._base_name or utils.base_from_name( self.name) if self._is_compiled_model and not base_endpoint_name.endswith( compiled_model_suffix): base_endpoint_name = "-".join( (base_endpoint_name, compiled_model_suffix)) self.endpoint_name = utils.name_from_base(base_endpoint_name) data_capture_config_dict = None if data_capture_config is not None: data_capture_config_dict = data_capture_config._to_request_dict() self.sagemaker_session.endpoint_from_production_variants( name=self.endpoint_name, production_variants=[production_variant], tags=tags, kms_key=kms_key, wait=wait, data_capture_config_dict=data_capture_config_dict, ) if self.predictor_cls: predictor = self.predictor_cls(self.endpoint_name, self.sagemaker_session) if serializer: predictor.serializer = serializer if deserializer: predictor.deserializer = deserializer return predictor return None