Example #1
0
def test_endpoint_from_production_variants(sagemaker_session):
    ims = sagemaker_session
    ims.sagemaker_client.describe_endpoint = Mock(
        return_value={'EndpointStatus': 'InService'})
    pvs = [
        sagemaker.production_variant('A', 'ml.p2.xlarge'),
        sagemaker.production_variant('B', 'p299.4096xlarge')
    ]
    ex = ClientError(
        {
            'Error': {
                'Code': 'ValidationException',
                'Message': 'Could not find your thing'
            }
        }, 'b')
    ims.sagemaker_client.describe_endpoint_config = Mock(side_effect=ex)
    sagemaker_session.endpoint_from_production_variants('some-endpoint', pvs)
    sagemaker_session.sagemaker_client.create_endpoint.assert_called_with(
        EndpointConfigName='some-endpoint', EndpointName='some-endpoint')
    sagemaker_session.sagemaker_client.create_endpoint_config.assert_called_with(
        EndpointConfigName='some-endpoint',
        ProductionVariants=[{
            'InstanceType': 'ml.p2.xlarge',
            'ModelName': 'A',
            'InitialVariantWeight': 1,
            'InitialInstanceCount': 1,
            'VariantName': 'AllTraffic'
        }, {
            'InstanceType': 'p299.4096xlarge',
            'ModelName': 'B',
            'InitialVariantWeight': 1,
            'InitialInstanceCount': 1,
            'VariantName': 'AllTraffic'
        }])
def test_endpoint_from_production_variants_with_tags(sagemaker_session):
    ims = sagemaker_session
    ims.sagemaker_client.describe_endpoint = Mock(return_value={'EndpointStatus': 'InService'})
    pvs = [sagemaker.production_variant('A', 'ml.p2.xlarge'), sagemaker.production_variant('B', 'p299.4096xlarge')]
    ex = ClientError({'Error': {'Code': 'ValidationException', 'Message': 'Could not find your thing'}}, 'b')
    ims.sagemaker_client.describe_endpoint_config = Mock(side_effect=ex)
    tags = [{'ModelName': 'TestModel'}]
    sagemaker_session.endpoint_from_production_variants('some-endpoint', pvs, tags)
    sagemaker_session.sagemaker_client.create_endpoint.assert_called_with(EndpointConfigName='some-endpoint',
                                                                          EndpointName='some-endpoint')
    sagemaker_session.sagemaker_client.create_endpoint_config.assert_called_with(
        EndpointConfigName='some-endpoint',
        ProductionVariants=[
            {
                'InstanceType': 'ml.p2.xlarge',
                'ModelName': 'A',
                'InitialVariantWeight': 1,
                'InitialInstanceCount': 1,
                'VariantName': 'AllTraffic'
            },
            {
                'InstanceType': 'p299.4096xlarge',
                'ModelName': 'B',
                'InitialVariantWeight': 1,
                'InitialInstanceCount': 1,
                'VariantName': 'AllTraffic'}],
        Tags=tags)
Example #3
0
def test_endpoint_from_production_variants_with_accelerator_type(
        sagemaker_session):
    ims = sagemaker_session
    ims.sagemaker_client.describe_endpoint = Mock(
        return_value={"EndpointStatus": "InService"})
    pvs = [
        sagemaker.production_variant("A",
                                     "ml.p2.xlarge",
                                     accelerator_type=ACCELERATOR_TYPE),
        sagemaker.production_variant("B",
                                     "p299.4096xlarge",
                                     accelerator_type=ACCELERATOR_TYPE),
    ]
    ex = ClientError(
        {
            "Error": {
                "Code": "ValidationException",
                "Message": "Could not find your thing"
            }
        }, "b")
    ims.sagemaker_client.describe_endpoint_config = Mock(side_effect=ex)
    tags = [{"ModelName": "TestModel"}]
    sagemaker_session.endpoint_from_production_variants(
        "some-endpoint", pvs, tags)
    sagemaker_session.sagemaker_client.create_endpoint.assert_called_with(
        EndpointConfigName="some-endpoint",
        EndpointName="some-endpoint",
        Tags=tags)
    sagemaker_session.sagemaker_client.create_endpoint_config.assert_called_with(
        EndpointConfigName="some-endpoint", ProductionVariants=pvs, Tags=tags)
def test_endpoint_from_production_variants_with_accelerator_type(
        sagemaker_session):
    ims = sagemaker_session
    ims.sagemaker_client.describe_endpoint = Mock(
        return_value={'EndpointStatus': 'InService'})
    pvs = [
        sagemaker.production_variant('A',
                                     'ml.p2.xlarge',
                                     accelerator_type=ACCELERATOR_TYPE),
        sagemaker.production_variant('B',
                                     'p299.4096xlarge',
                                     accelerator_type=ACCELERATOR_TYPE)
    ]
    ex = ClientError(
        {
            'Error': {
                'Code': 'ValidationException',
                'Message': 'Could not find your thing'
            }
        }, 'b')
    ims.sagemaker_client.describe_endpoint_config = Mock(side_effect=ex)
    tags = [{'ModelName': 'TestModel'}]
    sagemaker_session.endpoint_from_production_variants(
        'some-endpoint', pvs, tags)
    sagemaker_session.sagemaker_client.create_endpoint.assert_called_with(
        EndpointConfigName='some-endpoint', EndpointName='some-endpoint')
    sagemaker_session.sagemaker_client.create_endpoint_config.assert_called_with(
        EndpointConfigName='some-endpoint', ProductionVariants=pvs, Tags=tags)
Example #5
0
    def deploy(self,
               initial_instance_count,
               instance_type,
               endpoint_name=None,
               tags=None,
               wait=True):
        """Deploy this ``Model`` to an ``Endpoint`` and optionally return a
        ``Predictor``.

        Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an
        ``Endpoint`` from this ``Model``. If ``self.predictor_cls`` is not None,
        this method returns a the result of invoking ``self.predictor_cls`` on
        the created endpoint name.

        The name of the created model is accessible in the ``name`` field of
        this ``Model`` after deploy returns

        The name of the created endpoint is accessible in the
        ``endpoint_name`` field of this ``Model`` after deploy returns.

        Args:
            initial_instance_count (int): The initial number of instances to run
                in the ``Endpoint`` created from this ``Model``.
            instance_type (str): The EC2 instance type to deploy this Model to.
                For example, 'ml.p2.xlarge'.
            endpoint_name (str): The name of the endpoint to create (default:
                None). If not specified, a unique endpoint name will be created.
            tags (List[dict[str, str]]): The list of tags to attach to this
                specific endpoint.
            wait (bool): Whether the call should wait until the deployment of
                model completes (default: True).

        Returns:
            callable[string, sagemaker.session.Session] or None: Invocation of
            ``self.predictor_cls`` on the created endpoint name, if ``self.predictor_cls``
            is not None. Otherwise, return None.
        """
        if not self.sagemaker_session:
            self.sagemaker_session = Session()

        containers = self.pipeline_container_def(instance_type)

        self.name = self.name or name_from_image(containers[0]["Image"])
        self.sagemaker_session.create_model(self.name,
                                            self.role,
                                            containers,
                                            vpc_config=self.vpc_config)

        production_variant = sagemaker.production_variant(
            self.name, instance_type, initial_instance_count)
        self.endpoint_name = endpoint_name or self.name
        self.sagemaker_session.endpoint_from_production_variants(
            self.endpoint_name, [production_variant], tags, wait=wait)
        if self.predictor_cls:
            return self.predictor_cls(self.endpoint_name,
                                      self.sagemaker_session)
        return None
def deploy_config(model,
                  initial_instance_count,
                  instance_type,
                  endpoint_name=None,
                  tags=None):
    """Export Airflow deploy config from a SageMaker model

    Args:
        model (sagemaker.model.Model): The SageMaker model to export the Airflow
            config from.
        initial_instance_count (int): The initial number of instances to run in
            the ``Endpoint`` created from this ``Model``.
        instance_type (str): The EC2 instance type to deploy this Model to. For
            example, 'ml.p2.xlarge'.
        endpoint_name (str): The name of the endpoint to create (default: None).
            If not specified, a unique endpoint name will be created.
        tags (list[dict]): List of tags for labeling a training job. For more,
            see https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html.

    Returns:
        dict: Deploy config that can be directly used by
        SageMakerEndpointOperator in Airflow.
    """
    model_base_config = model_config(instance_type, model)

    production_variant = sagemaker.production_variant(model.name,
                                                      instance_type,
                                                      initial_instance_count)
    name = model.name
    config_options = {
        "EndpointConfigName": name,
        "ProductionVariants": [production_variant]
    }
    if tags is not None:
        config_options["Tags"] = tags

    endpoint_name = endpoint_name or name
    endpoint_base_config = {
        "EndpointName": endpoint_name,
        "EndpointConfigName": name
    }

    config = {
        "Model": model_base_config,
        "EndpointConfig": config_options,
        "Endpoint": endpoint_base_config,
    }

    # if there is s3 operations needed for model, move it to root level of config
    s3_operations = model_base_config.pop("S3Operations", None)
    if s3_operations is not None:
        config["S3Operations"] = s3_operations

    return config
Example #7
0
    def deploy(self, initial_instance_count, instance_type, accelerator_type=None, endpoint_name=None, tags=None):
        """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``.

        Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an ``Endpoint`` from this ``Model``.
        If ``self.predictor_cls`` is not None, this method returns a the result of invoking
        ``self.predictor_cls`` on the created endpoint name.

        The name of the created model is accessible in the ``name`` field of this ``Model`` after deploy returns

        The name of the created endpoint is accessible in the ``endpoint_name``
        field of this ``Model`` after deploy returns.

        Args:
            instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
            initial_instance_count (int): The initial number of instances to run in the
                ``Endpoint`` created from this ``Model``.
            accelerator_type (str): Type of Elastic Inference accelerator to deploy this model for model loading
                and inference, for example, 'ml.eia1.medium'. If not specified, no Elastic Inference accelerator
                will be attached to the endpoint.
                For more information: https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html
            endpoint_name (str): The name of the endpoint to create (default: None).
                If not specified, a unique endpoint name will be created.
            tags(List[dict[str, str]]): The list of tags to attach to this specific endpoint.

        Returns:
            callable[string, sagemaker.session.Session] or None: Invocation of ``self.predictor_cls`` on
                the created endpoint name, if ``self.predictor_cls`` is not None. Otherwise, return None.
        """
        if not self.sagemaker_session:
            if instance_type in ('local', 'local_gpu'):
                self.sagemaker_session = local.LocalSession()
            else:
                self.sagemaker_session = session.Session()

        if self.role is None:
            raise ValueError("Role can not be null for deploying a model")

        compiled_model_suffix = '-'.join(instance_type.split('.')[:-1])
        if self._is_compiled_model:
            self.name += compiled_model_suffix

        self._create_sagemaker_model(instance_type, accelerator_type)
        production_variant = sagemaker.production_variant(self.name, instance_type, initial_instance_count,
                                                          accelerator_type=accelerator_type)
        if endpoint_name:
            self.endpoint_name = endpoint_name
        else:
            self.endpoint_name = self.name
            if self._is_compiled_model and not self.endpoint_name.endswith(compiled_model_suffix):
                self.endpoint_name += compiled_model_suffix
        self.sagemaker_session.endpoint_from_production_variants(self.endpoint_name, [production_variant], tags)
        if self.predictor_cls:
            return self.predictor_cls(self.endpoint_name, self.sagemaker_session)
Example #8
0
    def deploy(self,
               initial_instance_count,
               instance_type,
               endpoint_name=None,
               tags=None):
        """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``.

        Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an ``Endpoint`` from this ``Model``.
        If ``self.predictor_cls`` is not None, this method returns a the result of invoking
        ``self.predictor_cls`` on the created endpoint name.

        The name of the created model is accessible in the ``name`` field of this ``Model`` after deploy returns

        The name of the created endpoint is accessible in the ``endpoint_name``
        field of this ``Model`` after deploy returns.

        Args:
            instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
            initial_instance_count (int): The initial number of instances to run in the
                ``Endpoint`` created from this ``Model``.
            endpoint_name (str): The name of the endpoint to create (default: None).
                If not specified, a unique endpoint name will be created.

        Returns:
            callable[string, sagemaker.session.Session] or None: Invocation of ``self.predictor_cls`` on
                the created endpoint name, if ``self.predictor_cls`` is not None. Otherwise, return None.
        """
        if not self.sagemaker_session:
            if instance_type in ('local', 'local_gpu'):
                self.sagemaker_session = LocalSession()
            else:
                self.sagemaker_session = Session()

        container_def = self.prepare_container_def(instance_type)
        self.name = self.name or name_from_image(container_def['Image'])
        self.sagemaker_session.create_model(self.name,
                                            self.role,
                                            container_def,
                                            vpc_config=self.vpc_config)
        production_variant = sagemaker.production_variant(
            self.name, instance_type, initial_instance_count)
        self.endpoint_name = endpoint_name or self.name
        self.sagemaker_session.endpoint_from_production_variants(
            self.endpoint_name, [production_variant], tags)
        if self.predictor_cls:
            return self.predictor_cls(self.endpoint_name,
                                      self.sagemaker_session)
Example #9
0
    def deploy(self, initial_instance_count, instance_type, endpoint_name=None, tags=None):
        """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``.

        Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an ``Endpoint`` from this ``Model``.
        If ``self.predictor_cls`` is not None, this method returns a the result of invoking
        ``self.predictor_cls`` on the created endpoint name.

        The name of the created endpoint is accessible in the ``endpoint_name``
        field of this ``Model`` after deploy returns.

        Args:
            instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
            initial_instance_count (int): The initial number of instances to run in the
                ``Endpoint`` created from this ``Model``.
            endpoint_name (str): The name of the endpoint to create (default: None).
                If not specified, a unique endpoint name will be created.
            tags (list[dict[str, str]]): A list of key-value pairs for tagging the endpoint (default: None).

        Returns:
            callable[string, sagemaker.session.Session] or None: Invocation of ``self.predictor_cls`` on
                the created endpoint name, if ``self.predictor_cls`` is not None. Otherwise, return None.
        """
        if not self.sagemaker_session:
            if instance_type in ('local', 'local_gpu'):
                self.sagemaker_session = LocalSession()
            else:
                self.sagemaker_session = Session()

        container_def = self.prepare_container_def(instance_type)
        model_name = self.name or name_from_image(container_def['Image'])
        self.sagemaker_session.create_model(model_name, self.role, container_def)
        production_variant = sagemaker.production_variant(model_name, instance_type, initial_instance_count)
        self.endpoint_name = endpoint_name or model_name
        self.sagemaker_session.endpoint_from_production_variants(self.endpoint_name, [production_variant], tags)
        if self.predictor_cls:
            return self.predictor_cls(self.endpoint_name, self.sagemaker_session)
Example #10
0
    def deploy(
        self,
        initial_instance_count,
        instance_type,
        accelerator_type=None,
        endpoint_name=None,
        update_endpoint=False,
        tags=None,
        kms_key=None,
        wait=True,
    ):
        """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``.

        Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an ``Endpoint`` from this ``Model``.
        If ``self.predictor_cls`` is not None, this method returns a the result of invoking
        ``self.predictor_cls`` on the created endpoint name.

        The name of the created model is accessible in the ``name`` field of this ``Model`` after deploy returns

        The name of the created endpoint is accessible in the ``endpoint_name``
        field of this ``Model`` after deploy returns.

        Args:
            instance_type (str): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
            initial_instance_count (int): The initial number of instances to run in the
                ``Endpoint`` created from this ``Model``.
            accelerator_type (str): Type of Elastic Inference accelerator to deploy this model for model loading
                and inference, for example, 'ml.eia1.medium'. If not specified, no Elastic Inference accelerator
                will be attached to the endpoint.
                For more information: https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html
            endpoint_name (str): The name of the endpoint to create (default: None).
                If not specified, a unique endpoint name will be created.
            update_endpoint (bool): Flag to update the model in an existing Amazon SageMaker endpoint.
                If True, this will deploy a new EndpointConfig to an already existing endpoint and delete resources
                corresponding to the previous EndpointConfig. If False, a new endpoint will be created. Default: False
            tags(List[dict[str, str]]): The list of tags to attach to this specific endpoint.
            kms_key (str): The ARN of the KMS key that is used to encrypt the data on the
                storage volume attached to the instance hosting the endpoint.
            wait (bool): Whether the call should wait until the deployment of this model completes (default: True).

        Returns:
            callable[string, sagemaker.session.Session] or None: Invocation of ``self.predictor_cls`` on
                the created endpoint name, if ``self.predictor_cls`` is not None. Otherwise, return None.
        """
        if not self.sagemaker_session:
            if instance_type in ("local", "local_gpu"):
                self.sagemaker_session = local.LocalSession()
            else:
                self.sagemaker_session = session.Session()

        if self.role is None:
            raise ValueError("Role can not be null for deploying a model")

        compiled_model_suffix = "-".join(instance_type.split(".")[:-1])
        if self._is_compiled_model:
            self.name += compiled_model_suffix

        self._create_sagemaker_model(instance_type, accelerator_type, tags)
        production_variant = sagemaker.production_variant(
            self.name,
            instance_type,
            initial_instance_count,
            accelerator_type=accelerator_type)
        if endpoint_name:
            self.endpoint_name = endpoint_name
        else:
            self.endpoint_name = self.name
            if self._is_compiled_model and not self.endpoint_name.endswith(
                    compiled_model_suffix):
                self.endpoint_name += compiled_model_suffix

        if update_endpoint:
            endpoint_config_name = self.sagemaker_session.create_endpoint_config(
                name=self.name,
                model_name=self.name,
                initial_instance_count=initial_instance_count,
                instance_type=instance_type,
                accelerator_type=accelerator_type,
                tags=tags,
                kms_key=kms_key,
            )
            self.sagemaker_session.update_endpoint(self.endpoint_name,
                                                   endpoint_config_name)
        else:
            self.sagemaker_session.endpoint_from_production_variants(
                self.endpoint_name, [production_variant], tags, kms_key, wait)

        if self.predictor_cls:
            return self.predictor_cls(self.endpoint_name,
                                      self.sagemaker_session)
    def deploy(
        self,
        initial_instance_count,
        instance_type,
        serializer=None,
        deserializer=None,
        endpoint_name=None,
        tags=None,
        wait=True,
        update_endpoint=False,
        data_capture_config=None,
    ):
        """Deploy this ``Model`` to an ``Endpoint`` and optionally return a
        ``Predictor``.

        Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an
        ``Endpoint`` from this ``Model``. If ``self.predictor_cls`` is not None,
        this method returns a the result of invoking ``self.predictor_cls`` on
        the created endpoint name.

        The name of the created model is accessible in the ``name`` field of
        this ``Model`` after deploy returns

        The name of the created endpoint is accessible in the
        ``endpoint_name`` field of this ``Model`` after deploy returns.

        Args:
            initial_instance_count (int): The initial number of instances to run
                in the ``Endpoint`` created from this ``Model``.
            instance_type (str): The EC2 instance type to deploy this Model to.
                For example, 'ml.p2.xlarge'.
            serializer (:class:`~sagemaker.serializers.BaseSerializer`): A
                serializer object, used to encode data for an inference endpoint
                (default: None). If ``serializer`` is not None, then
                ``serializer`` will override the default serializer. The
                default serializer is set by the ``predictor_cls``.
            deserializer (:class:`~sagemaker.deserializers.BaseDeserializer`): A
                deserializer object, used to decode data from an inference
                endpoint (default: None). If ``deserializer`` is not None, then
                ``deserializer`` will override the default deserializer. The
                default deserializer is set by the ``predictor_cls``.
            endpoint_name (str): The name of the endpoint to create (default:
                None). If not specified, a unique endpoint name will be created.
            tags (List[dict[str, str]]): The list of tags to attach to this
                specific endpoint.
            wait (bool): Whether the call should wait until the deployment of
                model completes (default: True).
            update_endpoint (bool): Flag to update the model in an existing
                Amazon SageMaker endpoint. If True, this will deploy a new
                EndpointConfig to an already existing endpoint and delete
                resources corresponding to the previous EndpointConfig. If
                False, a new endpoint will be created. Default: False
            data_capture_config (sagemaker.model_monitor.DataCaptureConfig): Specifies
                configuration related to Endpoint data capture for use with
                Amazon SageMaker Model Monitoring. Default: None.

        Returns:
            callable[string, sagemaker.session.Session] or None: Invocation of
            ``self.predictor_cls`` on the created endpoint name, if ``self.predictor_cls``
            is not None. Otherwise, return None.
        """
        if not self.sagemaker_session:
            self.sagemaker_session = Session()

        containers = self.pipeline_container_def(instance_type)

        self.name = self.name or name_from_image(containers[0]["Image"])
        self.sagemaker_session.create_model(self.name,
                                            self.role,
                                            containers,
                                            vpc_config=self.vpc_config)

        production_variant = sagemaker.production_variant(
            self.name, instance_type, initial_instance_count)
        self.endpoint_name = endpoint_name or self.name

        data_capture_config_dict = None
        if data_capture_config is not None:
            data_capture_config_dict = data_capture_config._to_request_dict()

        if update_endpoint:
            endpoint_config_name = self.sagemaker_session.create_endpoint_config(
                name=self.name,
                model_name=self.name,
                initial_instance_count=initial_instance_count,
                instance_type=instance_type,
                tags=tags,
                data_capture_config_dict=data_capture_config_dict,
            )
            self.sagemaker_session.update_endpoint(self.endpoint_name,
                                                   endpoint_config_name,
                                                   wait=wait)
        else:
            self.sagemaker_session.endpoint_from_production_variants(
                name=self.endpoint_name,
                production_variants=[production_variant],
                tags=tags,
                wait=wait,
                data_capture_config_dict=data_capture_config_dict,
            )

        if self.predictor_cls:
            predictor = self.predictor_cls(self.endpoint_name,
                                           self.sagemaker_session)
            if serializer:
                predictor.serializer = serializer
            if deserializer:
                predictor.deserializer = deserializer
            return predictor
        return None
Example #12
0
    def deploy(
        self,
        initial_instance_count,
        instance_type,
        accelerator_type=None,
        endpoint_name=None,
        update_endpoint=False,
        tags=None,
        kms_key=None,
        wait=True,
        data_capture_config=None,
    ):
        """Deploy this ``Model`` to an ``Endpoint`` and optionally return a
        ``Predictor``.

        Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an
        ``Endpoint`` from this ``Model``. If ``self.predictor_cls`` is not None,
        this method returns a the result of invoking ``self.predictor_cls`` on
        the created endpoint name.

        The name of the created model is accessible in the ``name`` field of
        this ``Model`` after deploy returns

        The name of the created endpoint is accessible in the
        ``endpoint_name`` field of this ``Model`` after deploy returns.

        Args:
            initial_instance_count (int): The initial number of instances to run
                in the ``Endpoint`` created from this ``Model``.
            instance_type (str): The EC2 instance type to deploy this Model to.
                For example, 'ml.p2.xlarge', or 'local' for local mode.
            accelerator_type (str): Type of Elastic Inference accelerator to
                deploy this model for model loading and inference, for example,
                'ml.eia1.medium'. If not specified, no Elastic Inference
                accelerator will be attached to the endpoint. For more
                information:
                https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html
            endpoint_name (str): The name of the endpoint to create (default:
                None). If not specified, a unique endpoint name will be created.
            update_endpoint (bool): Flag to update the model in an existing
                Amazon SageMaker endpoint. If True, this will deploy a new
                EndpointConfig to an already existing endpoint and delete
                resources corresponding to the previous EndpointConfig. If
                False, a new endpoint will be created. Default: False
            tags (List[dict[str, str]]): The list of tags to attach to this
                specific endpoint.
            kms_key (str): The ARN of the KMS key that is used to encrypt the
                data on the storage volume attached to the instance hosting the
                endpoint.
            wait (bool): Whether the call should wait until the deployment of
                this model completes (default: True).
            data_capture_config (sagemaker.model_monitor.DataCaptureConfig): Specifies
                configuration related to Endpoint data capture for use with
                Amazon SageMaker Model Monitoring. Default: None.

        Returns:
            callable[string, sagemaker.session.Session] or None: Invocation of
                ``self.predictor_cls`` on the created endpoint name, if ``self.predictor_cls``
                is not None. Otherwise, return None.
        """
        self._init_sagemaker_session_if_does_not_exist(instance_type)

        if self.role is None:
            raise ValueError("Role can not be null for deploying a model")

        if instance_type.startswith("ml.inf") and not self._is_compiled_model:
            LOGGER.warning(
                "Your model is not compiled. Please compile your model before using Inferentia."
            )

        compiled_model_suffix = "-".join(instance_type.split(".")[:-1])
        if self._is_compiled_model:
            name_prefix = self.name or utils.name_from_image(self.image)
            self.name = "{}{}".format(name_prefix, compiled_model_suffix)

        self._create_sagemaker_model(instance_type, accelerator_type, tags)
        production_variant = sagemaker.production_variant(
            self.name, instance_type, initial_instance_count, accelerator_type=accelerator_type
        )
        if endpoint_name:
            self.endpoint_name = endpoint_name
        else:
            self.endpoint_name = self.name
            if self._is_compiled_model and not self.endpoint_name.endswith(compiled_model_suffix):
                self.endpoint_name += compiled_model_suffix

        data_capture_config_dict = None
        if data_capture_config is not None:
            data_capture_config_dict = data_capture_config._to_request_dict()

        if update_endpoint:
            endpoint_config_name = self.sagemaker_session.create_endpoint_config(
                name=self.name,
                model_name=self.name,
                initial_instance_count=initial_instance_count,
                instance_type=instance_type,
                accelerator_type=accelerator_type,
                tags=tags,
                kms_key=kms_key,
                data_capture_config_dict=data_capture_config_dict,
            )
            self.sagemaker_session.update_endpoint(
                self.endpoint_name, endpoint_config_name, wait=wait
            )
        else:
            self.sagemaker_session.endpoint_from_production_variants(
                name=self.endpoint_name,
                production_variants=[production_variant],
                tags=tags,
                kms_key=kms_key,
                wait=wait,
                data_capture_config_dict=data_capture_config_dict,
            )

        if self.predictor_cls:
            return self.predictor_cls(self.endpoint_name, self.sagemaker_session)
        return None
    def deploy(
        self,
        initial_instance_count,
        instance_type,
        serializer=None,
        deserializer=None,
        accelerator_type=None,
        endpoint_name=None,
        tags=None,
        kms_key=None,
        wait=True,
        data_capture_config=None,
    ):
        """Deploy this ``Model`` to an ``Endpoint`` and optionally return a ``Predictor``.

        Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an
        ``Endpoint`` from this ``Model``. If self.model is not None, then the ``Endpoint``
        will be deployed with parameters in self.model (like vpc_config,
        enable_network_isolation, etc).  If self.model is None, then use the parameters
        in ``MultiDataModel`` constructor will be used. If ``self.predictor_cls`` is not
        None, this method returns a the result of invoking ``self.predictor_cls`` on
        the created endpoint name.

        The name of the created model is accessible in the ``name`` field of
        this ``Model`` after deploy returns

        The name of the created endpoint is accessible in the
        ``endpoint_name`` field of this ``Model`` after deploy returns.

        Args:
            initial_instance_count (int): The initial number of instances to run
                in the ``Endpoint`` created from this ``Model``.
            instance_type (str): The EC2 instance type to deploy this Model to.
                For example, 'ml.p2.xlarge', or 'local' for local mode.
            serializer (:class:`~sagemaker.serializers.BaseSerializer`): A
                serializer object, used to encode data for an inference endpoint
                (default: None). If ``serializer`` is not None, then
                ``serializer`` will override the default serializer. The
                default serializer is set by the ``predictor_cls``.
            deserializer (:class:`~sagemaker.deserializers.BaseDeserializer`): A
                deserializer object, used to decode data from an inference
                endpoint (default: None). If ``deserializer`` is not None, then
                ``deserializer`` will override the default deserializer. The
                default deserializer is set by the ``predictor_cls``.
            accelerator_type (str): Type of Elastic Inference accelerator to
                deploy this model for model loading and inference, for example,
                'ml.eia1.medium'. If not specified, no Elastic Inference
                accelerator will be attached to the endpoint. For more
                information:
                https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html
            endpoint_name (str): The name of the endpoint to create (default:
                None). If not specified, a unique endpoint name will be created.
            tags (List[dict[str, str]]): The list of tags to attach to this
                specific endpoint.
            kms_key (str): The ARN of the KMS key that is used to encrypt the
                data on the storage volume attached to the instance hosting the
                endpoint.
            wait (bool): Whether the call should wait until the deployment of
                this model completes (default: True).
            data_capture_config (sagemaker.model_monitor.DataCaptureConfig): Specifies
                configuration related to Endpoint data capture for use with
                Amazon SageMaker Model Monitoring. Default: None.

        Returns:
            callable[string, sagemaker.session.Session] or None: Invocation of
                ``self.predictor_cls`` on the created endpoint name,
                if ``self.predictor_cls``
                is not None. Otherwise, return None.
        """
        # Set model specific parameters
        if self.model:
            enable_network_isolation = self.model.enable_network_isolation()
            role = self.model.role
            vpc_config = self.model.vpc_config
            predictor_cls = self.model.predictor_cls
        else:
            enable_network_isolation = self.enable_network_isolation()
            role = self.role
            vpc_config = self.vpc_config
            predictor_cls = self.predictor_cls

        if role is None:
            raise ValueError("Role can not be null for deploying a model")

        if instance_type == "local" and not isinstance(self.sagemaker_session,
                                                       local.LocalSession):
            self.sagemaker_session = local.LocalSession()

        container_def = self.prepare_container_def(
            instance_type, accelerator_type=accelerator_type)
        self.sagemaker_session.create_model(
            self.name,
            role,
            container_def,
            vpc_config=vpc_config,
            enable_network_isolation=enable_network_isolation,
            tags=tags,
        )

        production_variant = sagemaker.production_variant(
            self.name,
            instance_type,
            initial_instance_count,
            accelerator_type=accelerator_type)
        if endpoint_name:
            self.endpoint_name = endpoint_name
        else:
            self.endpoint_name = self.name

        data_capture_config_dict = None
        if data_capture_config is not None:
            data_capture_config_dict = data_capture_config._to_request_dict()

        self.sagemaker_session.endpoint_from_production_variants(
            name=self.endpoint_name,
            production_variants=[production_variant],
            tags=tags,
            kms_key=kms_key,
            wait=wait,
            data_capture_config_dict=data_capture_config_dict,
        )

        if predictor_cls:
            predictor = predictor_cls(self.endpoint_name,
                                      self.sagemaker_session)
            if serializer:
                predictor.serializer = serializer
            if deserializer:
                predictor.deserializer = deserializer
            return predictor
        return None
Example #14
0
    def deploy(
        self,
        initial_instance_count,
        instance_type,
        serializer=None,
        deserializer=None,
        accelerator_type=None,
        endpoint_name=None,
        tags=None,
        kms_key=None,
        wait=True,
        data_capture_config=None,
        **kwargs,
    ):
        """Deploy this ``Model`` to an ``Endpoint`` and optionally return a
        ``Predictor``.

        Create a SageMaker ``Model`` and ``EndpointConfig``, and deploy an
        ``Endpoint`` from this ``Model``. If ``self.predictor_cls`` is not None,
        this method returns a the result of invoking ``self.predictor_cls`` on
        the created endpoint name.

        The name of the created model is accessible in the ``name`` field of
        this ``Model`` after deploy returns

        The name of the created endpoint is accessible in the
        ``endpoint_name`` field of this ``Model`` after deploy returns.

        Args:
            initial_instance_count (int): The initial number of instances to run
                in the ``Endpoint`` created from this ``Model``.
            instance_type (str): The EC2 instance type to deploy this Model to.
                For example, 'ml.p2.xlarge', or 'local' for local mode.
            serializer (:class:`~sagemaker.serializers.BaseSerializer`): A
                serializer object, used to encode data for an inference endpoint
                (default: None). If ``serializer`` is not None, then
                ``serializer`` will override the default serializer. The
                default serializer is set by the ``predictor_cls``.
            deserializer (:class:`~sagemaker.deserializers.BaseDeserializer`): A
                deserializer object, used to decode data from an inference
                endpoint (default: None). If ``deserializer`` is not None, then
                ``deserializer`` will override the default deserializer. The
                default deserializer is set by the ``predictor_cls``.
            accelerator_type (str): Type of Elastic Inference accelerator to
                deploy this model for model loading and inference, for example,
                'ml.eia1.medium'. If not specified, no Elastic Inference
                accelerator will be attached to the endpoint. For more
                information:
                https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html
            endpoint_name (str): The name of the endpoint to create (default:
                None). If not specified, a unique endpoint name will be created.
            tags (List[dict[str, str]]): The list of tags to attach to this
                specific endpoint.
            kms_key (str): The ARN of the KMS key that is used to encrypt the
                data on the storage volume attached to the instance hosting the
                endpoint.
            wait (bool): Whether the call should wait until the deployment of
                this model completes (default: True).
            data_capture_config (sagemaker.model_monitor.DataCaptureConfig): Specifies
                configuration related to Endpoint data capture for use with
                Amazon SageMaker Model Monitoring. Default: None.

        Returns:
            callable[string, sagemaker.session.Session] or None: Invocation of
                ``self.predictor_cls`` on the created endpoint name, if ``self.predictor_cls``
                is not None. Otherwise, return None.
        """
        removed_kwargs("update_endpoint", kwargs)
        self._init_sagemaker_session_if_does_not_exist(instance_type)

        if self.role is None:
            raise ValueError("Role can not be null for deploying a model")

        if instance_type.startswith("ml.inf") and not self._is_compiled_model:
            LOGGER.warning(
                "Your model is not compiled. Please compile your model before using Inferentia."
            )

        compiled_model_suffix = "-".join(instance_type.split(".")[:-1])
        if self._is_compiled_model:
            self._ensure_base_name_if_needed(self.image_uri)
            if self._base_name is not None:
                self._base_name = "-".join(
                    (self._base_name, compiled_model_suffix))

        self._create_sagemaker_model(instance_type, accelerator_type, tags)
        production_variant = sagemaker.production_variant(
            self.name,
            instance_type,
            initial_instance_count,
            accelerator_type=accelerator_type)
        if endpoint_name:
            self.endpoint_name = endpoint_name
        else:
            base_endpoint_name = self._base_name or utils.base_from_name(
                self.name)
            if self._is_compiled_model and not base_endpoint_name.endswith(
                    compiled_model_suffix):
                base_endpoint_name = "-".join(
                    (base_endpoint_name, compiled_model_suffix))
            self.endpoint_name = utils.name_from_base(base_endpoint_name)

        data_capture_config_dict = None
        if data_capture_config is not None:
            data_capture_config_dict = data_capture_config._to_request_dict()

        self.sagemaker_session.endpoint_from_production_variants(
            name=self.endpoint_name,
            production_variants=[production_variant],
            tags=tags,
            kms_key=kms_key,
            wait=wait,
            data_capture_config_dict=data_capture_config_dict,
        )

        if self.predictor_cls:
            predictor = self.predictor_cls(self.endpoint_name,
                                           self.sagemaker_session)
            if serializer:
                predictor.serializer = serializer
            if deserializer:
                predictor.deserializer = deserializer
            return predictor
        return None