def test_huggingface_inference(
    sagemaker_session,
    gpu_instance_type,
    huggingface_inference_latest_version,
    huggingface_inference_pytorch_latest_version,
):
    env = {
        "HF_MODEL_ID":
        "sshleifer/tiny-distilbert-base-uncased-finetuned-sst-2-english",
        "HF_TASK": "text-classification",
    }
    endpoint_name = unique_name_from_base("test-hf-inference")

    model = HuggingFaceModel(
        sagemaker_session=sagemaker_session,
        role="SageMakerRole",
        env=env,
        py_version="py36",
        transformers_version=huggingface_inference_latest_version,
        pytorch_version=huggingface_inference_pytorch_latest_version,
    )
    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        model.deploy(instance_type=gpu_instance_type,
                     initial_instance_count=1,
                     endpoint_name=endpoint_name)

        predictor = HuggingFacePredictor(endpoint_name=endpoint_name)
        data = {
            "inputs":
            "Camera - You are awarded a SiPix Digital Camera!"
            "call 09061221066 fromm landline. Delivery within 28 days."
        }
        output = predictor.predict(data)
        assert "score" in output[0]
    def create_model(self,
                     model_server_workers=None,
                     role=None,
                     vpc_config_override=VPC_CONFIG_DEFAULT,
                     entry_point=None,
                     source_dir=None,
                     dependencies=None,
                     **kwargs):
        """Create a SageMaker ``HuggingFaceModel`` object that can be deployed to an ``Endpoint``.

        Args:
            model_server_workers (int): Optional. The number of worker processes
                used by the inference server. If None, server will use one
                worker per vCPU.
            role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``,
                which is also used during transform jobs. If not specified, the
                role from the Estimator will be used.
            vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on
                the model. Default: use subnets and security groups from this Estimator.
                * 'Subnets' (list[str]): List of subnet ids.
                * 'SecurityGroupIds' (list[str]): List of security group ids.
            entry_point (str): Path (absolute or relative) to the local Python source file which
                should be executed as the entry point to training. If ``source_dir`` is specified,
                then ``entry_point`` must point to a file located at the root of ``source_dir``.
                Defaults to `None`.
            source_dir (str): Path (absolute or relative) to a directory with any other serving
                source code dependencies aside from the entry point file.
                If not specified, the model source directory from training is used.
            dependencies (list[str]): A list of paths to directories (absolute or relative) with
                any additional libraries that will be exported to the container.
                If not specified, the dependencies from training are used.
                This is not supported with "local code" in Local Mode.
            **kwargs: Additional kwargs passed to the :class:`~sagemaker.huggingface.model.HuggingFaceModel`
                constructor.
        Returns:
            sagemaker.huggingface.model.HuggingFaceModel: A SageMaker ``HuggingFaceModel``
            object. See :func:`~sagemaker.huggingface.model.HuggingFaceModel` for full details.
        """
        if "image_uri" not in kwargs:
            kwargs["image_uri"] = self.image_uri

        kwargs["name"] = self._get_or_create_name(kwargs.get("name"))

        return HuggingFaceModel(
            role or self.role,
            model_data=self.model_data,
            entry_point=entry_point,
            transformers_version=self.framework_version,
            tensorflow_version=self.tensorflow_version,
            pytorch_version=self.pytorch_version,
            py_version=self.py_version,
            source_dir=(source_dir or self._model_source_dir()),
            container_log_level=self.container_log_level,
            code_location=self.code_location,
            model_server_workers=model_server_workers,
            sagemaker_session=self.sagemaker_session,
            vpc_config=self.get_vpc_config(vpc_config_override),
            dependencies=(dependencies or self.dependencies),
            **kwargs)
Ejemplo n.º 3
0
def test_jumpstart_huggingface_image_uri(patched_get_model_specs, session):

    patched_get_model_specs.side_effect = get_prototype_model_spec

    model_id, model_version = "huggingface-spc-bert-base-cased", "*"
    instance_type = "ml.p2.xlarge"
    region = "us-west-2"

    model_specs = accessors.JumpStartModelsAccessor.get_model_specs(
        region, model_id, model_version)

    # inference
    uri = image_uris.retrieve(
        framework=None,
        region=region,
        image_scope="inference",
        model_id=model_id,
        model_version=model_version,
        instance_type=instance_type,
    )

    framework_class_uri = HuggingFaceModel(
        role="mock_role",
        transformers_version=model_specs.hosting_ecr_specs.
        huggingface_transformers_version,
        pytorch_version=model_specs.hosting_ecr_specs.framework_version,
        py_version=model_specs.hosting_ecr_specs.py_version,
        sagemaker_session=session,
    ).serving_image_uri(region, instance_type)

    assert uri == framework_class_uri

    assert (
        uri ==
        "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-inference:"
        "1.7.1-transformers4.6.1-gpu-py36-cu110-ubuntu18.04")

    # training
    uri = image_uris.retrieve(
        framework=None,
        region=region,
        image_scope="training",
        model_id=model_id,
        model_version=model_version,
        instance_type=instance_type,
    )

    framework_class_uri = HuggingFace(
        role="mock_role",
        region=region,
        py_version=model_specs.training_ecr_specs.py_version,
        entry_point="some_entry_point",
        transformers_version=model_specs.training_ecr_specs.
        huggingface_transformers_version,
        pytorch_version=model_specs.training_ecr_specs.framework_version,
        instance_type=instance_type,
        instance_count=1,
        sagemaker_session=session,
    ).training_image_uri(region=region)

    assert (
        uri ==
        "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-training:"
        "1.6.0-transformers4.4.2-gpu-py36-cu110-ubuntu18.04")

    assert uri == framework_class_uri