def test_huggingface_inference( sagemaker_session, gpu_instance_type, huggingface_inference_latest_version, huggingface_inference_pytorch_latest_version, ): env = { "HF_MODEL_ID": "sshleifer/tiny-distilbert-base-uncased-finetuned-sst-2-english", "HF_TASK": "text-classification", } endpoint_name = unique_name_from_base("test-hf-inference") model = HuggingFaceModel( sagemaker_session=sagemaker_session, role="SageMakerRole", env=env, py_version="py36", transformers_version=huggingface_inference_latest_version, pytorch_version=huggingface_inference_pytorch_latest_version, ) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model.deploy(instance_type=gpu_instance_type, initial_instance_count=1, endpoint_name=endpoint_name) predictor = HuggingFacePredictor(endpoint_name=endpoint_name) data = { "inputs": "Camera - You are awarded a SiPix Digital Camera!" "call 09061221066 fromm landline. Delivery within 28 days." } output = predictor.predict(data) assert "score" in output[0]
def create_model(self, model_server_workers=None, role=None, vpc_config_override=VPC_CONFIG_DEFAULT, entry_point=None, source_dir=None, dependencies=None, **kwargs): """Create a SageMaker ``HuggingFaceModel`` object that can be deployed to an ``Endpoint``. Args: model_server_workers (int): Optional. The number of worker processes used by the inference server. If None, server will use one worker per vCPU. role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, which is also used during transform jobs. If not specified, the role from the Estimator will be used. vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on the model. Default: use subnets and security groups from this Estimator. * 'Subnets' (list[str]): List of subnet ids. * 'SecurityGroupIds' (list[str]): List of security group ids. entry_point (str): Path (absolute or relative) to the local Python source file which should be executed as the entry point to training. If ``source_dir`` is specified, then ``entry_point`` must point to a file located at the root of ``source_dir``. Defaults to `None`. source_dir (str): Path (absolute or relative) to a directory with any other serving source code dependencies aside from the entry point file. If not specified, the model source directory from training is used. dependencies (list[str]): A list of paths to directories (absolute or relative) with any additional libraries that will be exported to the container. If not specified, the dependencies from training are used. This is not supported with "local code" in Local Mode. **kwargs: Additional kwargs passed to the :class:`~sagemaker.huggingface.model.HuggingFaceModel` constructor. Returns: sagemaker.huggingface.model.HuggingFaceModel: A SageMaker ``HuggingFaceModel`` object. See :func:`~sagemaker.huggingface.model.HuggingFaceModel` for full details. """ if "image_uri" not in kwargs: kwargs["image_uri"] = self.image_uri kwargs["name"] = self._get_or_create_name(kwargs.get("name")) return HuggingFaceModel( role or self.role, model_data=self.model_data, entry_point=entry_point, transformers_version=self.framework_version, tensorflow_version=self.tensorflow_version, pytorch_version=self.pytorch_version, py_version=self.py_version, source_dir=(source_dir or self._model_source_dir()), container_log_level=self.container_log_level, code_location=self.code_location, model_server_workers=model_server_workers, sagemaker_session=self.sagemaker_session, vpc_config=self.get_vpc_config(vpc_config_override), dependencies=(dependencies or self.dependencies), **kwargs)
def test_jumpstart_huggingface_image_uri(patched_get_model_specs, session): patched_get_model_specs.side_effect = get_prototype_model_spec model_id, model_version = "huggingface-spc-bert-base-cased", "*" instance_type = "ml.p2.xlarge" region = "us-west-2" model_specs = accessors.JumpStartModelsAccessor.get_model_specs( region, model_id, model_version) # inference uri = image_uris.retrieve( framework=None, region=region, image_scope="inference", model_id=model_id, model_version=model_version, instance_type=instance_type, ) framework_class_uri = HuggingFaceModel( role="mock_role", transformers_version=model_specs.hosting_ecr_specs. huggingface_transformers_version, pytorch_version=model_specs.hosting_ecr_specs.framework_version, py_version=model_specs.hosting_ecr_specs.py_version, sagemaker_session=session, ).serving_image_uri(region, instance_type) assert uri == framework_class_uri assert ( uri == "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-inference:" "1.7.1-transformers4.6.1-gpu-py36-cu110-ubuntu18.04") # training uri = image_uris.retrieve( framework=None, region=region, image_scope="training", model_id=model_id, model_version=model_version, instance_type=instance_type, ) framework_class_uri = HuggingFace( role="mock_role", region=region, py_version=model_specs.training_ecr_specs.py_version, entry_point="some_entry_point", transformers_version=model_specs.training_ecr_specs. huggingface_transformers_version, pytorch_version=model_specs.training_ecr_specs.framework_version, instance_type=instance_type, instance_count=1, sagemaker_session=session, ).training_image_uri(region=region) assert ( uri == "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-training:" "1.6.0-transformers4.4.2-gpu-py36-cu110-ubuntu18.04") assert uri == framework_class_uri