コード例 #1
0
def _ecr_image_uri(sagemaker_session, algorithm_name):
    region = sagemaker_session.boto_region_name

    sts_client = sagemaker_session.boto_session.client(
        "sts", region_name=region, endpoint_url=utils.sts_regional_endpoint(region)
    )
    account_id = sts_client.get_caller_identity()["Account"]

    endpoint_data = utils._botocore_resolver().construct_endpoint("ecr", region)
    return "{}.dkr.{}/{}:latest".format(account_id, endpoint_data["hostname"], algorithm_name)
コード例 #2
0
def retrieve(
    framework,
    region,
    version=None,
    py_version=None,
    instance_type=None,
    accelerator_type=None,
    image_scope=None,
    container_version=None,
    distribution=None,
):
    """Retrieves the ECR URI for the Docker image matching the given arguments.

    Args:
        framework (str): The name of the framework or algorithm.
        region (str): The AWS region.
        version (str): The framework or algorithm version. This is required if there is
            more than one supported version for the given framework or algorithm.
        py_version (str): The Python version. This is required if there is
            more than one supported Python version for the given framework version.
        instance_type (str): The SageMaker instance type. For supported types, see
            https://aws.amazon.com/sagemaker/pricing/instance-types. This is required if
            there are different images for different processor types.
        accelerator_type (str): Elastic Inference accelerator type. For more, see
            https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html.
        image_scope (str): The image type, i.e. what it is used for.
            Valid values: "training", "inference", "eia". If ``accelerator_type`` is set,
            ``image_scope`` is ignored.
        container_version (str): the version of docker image
        distribution (dict): A dictionary with information on how to run distributed training
            (default: None).

    Returns:
        str: the ECR URI for the corresponding SageMaker Docker image.

    Raises:
        ValueError: If the combination of arguments specified is not supported.
    """
    config = _config_for_framework_and_scope(framework, image_scope,
                                             accelerator_type)

    version = _validate_version_and_set_if_needed(version, config, framework)
    version_config = config["versions"][_version_for_config(version, config)]

    py_version = _validate_py_version_and_set_if_needed(
        py_version, version_config, framework)
    version_config = version_config.get(py_version) or version_config

    registry = _registry_from_region(region, version_config["registries"])
    hostname = utils._botocore_resolver().construct_endpoint(
        "ecr", region)["hostname"]

    repo = version_config["repository"]

    processor = _processor(
        instance_type,
        config.get("processors") or version_config.get("processors"))

    tag = _format_tag(
        version_config.get("tag_prefix", version),
        processor,
        py_version,
        container_version,
    )

    if _should_auto_select_container_version(instance_type, distribution):
        container_versions = {
            "tensorflow-2.3-gpu-py37": "cu110-ubuntu18.04-v3",
            "tensorflow-2.3.1-gpu-py37": "cu110-ubuntu18.04",
            "tensorflow-1.15-gpu-py37": "cu110-ubuntu18.04-v8",
            "tensorflow-1.15.4-gpu-py37": "cu110-ubuntu18.04",
            "mxnet-1.8-gpu-py37": "cu110-ubuntu16.04-v1",
            "mxnet-1.8.0-gpu-py37": "cu110-ubuntu16.04",
            "pytorch-1.6-gpu-py36": "cu110-ubuntu18.04-v3",
            "pytorch-1.6.0-gpu-py36": "cu110-ubuntu18.04",
            "pytorch-1.6-gpu-py3": "cu110-ubuntu18.04-v3",
            "pytorch-1.6.0-gpu-py3": "cu110-ubuntu18.04",
        }
        key = "-".join([framework, tag])
        if key in container_versions:
            tag = "-".join([tag, container_versions[key]])

    if tag:
        repo += ":{}".format(tag)

    return ECR_URI_TEMPLATE.format(registry=registry,
                                   hostname=hostname,
                                   repository=repo)
コード例 #3
0
def retrieve(
    framework,
    region,
    version=None,
    py_version=None,
    instance_type=None,
    accelerator_type=None,
    image_scope=None,
    container_version=None,
    distribution=None,
    base_framework_version=None,
    training_compiler_config=None,
):
    """Retrieves the ECR URI for the Docker image matching the given arguments.

    Ideally this function should not be called directly, rather it should be called from the
    fit() function inside framework estimator.

    Args:
        framework (str): The name of the framework or algorithm.
        region (str): The AWS region.
        version (str): The framework or algorithm version. This is required if there is
            more than one supported version for the given framework or algorithm.
        py_version (str): The Python version. This is required if there is
            more than one supported Python version for the given framework version.
        instance_type (str): The SageMaker instance type. For supported types, see
            https://aws.amazon.com/sagemaker/pricing/instance-types. This is required if
            there are different images for different processor types.
        accelerator_type (str): Elastic Inference accelerator type. For more, see
            https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html.
        image_scope (str): The image type, i.e. what it is used for.
            Valid values: "training", "inference", "eia". If ``accelerator_type`` is set,
            ``image_scope`` is ignored.
        container_version (str): the version of docker image.
            Ideally the value of parameter should be created inside the framework.
            For custom use, see the list of supported container versions:
            https://github.com/aws/deep-learning-containers/blob/master/available_images.md
            (default: None).
        distribution (dict): A dictionary with information on how to run distributed training
        training_compiler_config (:class:`~sagemaker.training_compiler.TrainingCompilerConfig`):
            A configuration class for the SageMaker Training Compiler
            (default: None).

    Returns:
        str: the ECR URI for the corresponding SageMaker Docker image.

    Raises:
        ValueError: If the combination of arguments specified is not supported.
    """
    if training_compiler_config is None:
        config = _config_for_framework_and_scope(framework, image_scope,
                                                 accelerator_type)
    elif framework == HUGGING_FACE_FRAMEWORK:
        config = _config_for_framework_and_scope(
            framework + "-training-compiler", image_scope, accelerator_type)
    else:
        raise ValueError(
            "Unsupported Configuration: Training Compiler is only supported with HuggingFace"
        )
    original_version = version
    version = _validate_version_and_set_if_needed(version, config, framework)
    version_config = config["versions"][_version_for_config(version, config)]

    if framework == HUGGING_FACE_FRAMEWORK:
        if version_config.get("version_aliases"):
            full_base_framework_version = version_config[
                "version_aliases"].get(base_framework_version,
                                       base_framework_version)

        _validate_arg(full_base_framework_version, list(version_config.keys()),
                      "base framework")
        version_config = version_config.get(full_base_framework_version)

    py_version = _validate_py_version_and_set_if_needed(
        py_version, version_config, framework)
    version_config = version_config.get(py_version) or version_config
    registry = _registry_from_region(region, version_config["registries"])
    hostname = utils._botocore_resolver().construct_endpoint(
        "ecr", region)["hostname"]

    repo = version_config["repository"]

    processor = _processor(
        instance_type,
        config.get("processors") or version_config.get("processors"))

    # if container version is available in .json file, utilize that
    if version_config.get("container_version"):
        container_version = version_config["container_version"][processor]

    if framework == HUGGING_FACE_FRAMEWORK:
        pt_or_tf_version = (re.compile("^(pytorch|tensorflow)(.*)$").match(
            base_framework_version).group(2))

        _version = original_version
        if repo in [
                "huggingface-pytorch-trcomp-training",
                "huggingface-tensorflow-trcomp-training",
        ]:
            _version = version

        tag_prefix = f"{pt_or_tf_version}-transformers{_version}"

    else:
        tag_prefix = version_config.get("tag_prefix", version)

    tag = _format_tag(
        tag_prefix,
        processor,
        py_version,
        container_version,
    )

    if _should_auto_select_container_version(instance_type, distribution):
        container_versions = {
            "tensorflow-2.3-gpu-py37": "cu110-ubuntu18.04-v3",
            "tensorflow-2.3.1-gpu-py37": "cu110-ubuntu18.04",
            "tensorflow-2.3.2-gpu-py37": "cu110-ubuntu18.04",
            "tensorflow-1.15-gpu-py37": "cu110-ubuntu18.04-v8",
            "tensorflow-1.15.4-gpu-py37": "cu110-ubuntu18.04",
            "tensorflow-1.15.5-gpu-py37": "cu110-ubuntu18.04",
            "mxnet-1.8-gpu-py37": "cu110-ubuntu16.04-v1",
            "mxnet-1.8.0-gpu-py37": "cu110-ubuntu16.04",
            "pytorch-1.6-gpu-py36": "cu110-ubuntu18.04-v3",
            "pytorch-1.6.0-gpu-py36": "cu110-ubuntu18.04",
            "pytorch-1.6-gpu-py3": "cu110-ubuntu18.04-v3",
            "pytorch-1.6.0-gpu-py3": "cu110-ubuntu18.04",
        }
        key = "-".join([framework, tag])
        if key in container_versions:
            tag = "-".join([tag, container_versions[key]])

    if tag:
        repo += ":{}".format(tag)

    return ECR_URI_TEMPLATE.format(registry=registry,
                                   hostname=hostname,
                                   repository=repo)
コード例 #4
0
def retrieve(
    framework,
    region,
    version=None,
    py_version=None,
    instance_type=None,
    accelerator_type=None,
    image_scope=None,
    container_version=None,
):
    """Retrieves the ECR URI for the Docker image matching the given arguments.

    Args:
        framework (str): The name of the framework or algorithm.
        region (str): The AWS region.
        version (str): The framework or algorithm version. This is required if there is
            more than one supported version for the given framework or algorithm.
        py_version (str): The Python version. This is required if there is
            more than one supported Python version for the given framework version.
        instance_type (str): The SageMaker instance type. For supported types, see
            https://aws.amazon.com/sagemaker/pricing/instance-types. This is required if
            there are different images for different processor types.
        accelerator_type (str): Elastic Inference accelerator type. For more, see
            https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html.
        image_scope (str): The image type, i.e. what it is used for.
            Valid values: "training", "inference", "eia". If ``accelerator_type`` is set,
            ``image_scope`` is ignored.
        container_version (str): the version of docker image

    Returns:
        str: the ECR URI for the corresponding SageMaker Docker image.

    Raises:
        ValueError: If the combination of arguments specified is not supported.
    """
    config = _config_for_framework_and_scope(framework, image_scope,
                                             accelerator_type)

    version = _validate_version_and_set_if_needed(version, config, framework)
    version_config = config["versions"][_version_for_config(version, config)]

    py_version = _validate_py_version_and_set_if_needed(
        py_version, version_config, framework)
    version_config = version_config.get(py_version) or version_config

    registry = _registry_from_region(region, version_config["registries"])
    hostname = utils._botocore_resolver().construct_endpoint(
        "ecr", region)["hostname"]

    repo = version_config["repository"]

    processor = _processor(
        instance_type,
        config.get("processors") or version_config.get("processors"))
    tag = _format_tag(version_config.get("tag_prefix", version), processor,
                      py_version, container_version)

    if tag:
        repo += ":{}".format(tag)

    return ECR_URI_TEMPLATE.format(registry=registry,
                                   hostname=hostname,
                                   repository=repo)
コード例 #5
0
def retrieve(
    framework,
    region,
    version=None,
    py_version=None,
    instance_type=None,
    accelerator_type=None,
    image_scope=None,
    container_version=None,
    distribution=None,
    base_framework_version=None,
    training_compiler_config=None,
    model_id=None,
    model_version=None,
    tolerate_vulnerable_model=False,
    tolerate_deprecated_model=False,
    sdk_version=None,
    inference_tool=None,
    serverless_inference_config=None,
) -> str:
    """Retrieves the ECR URI for the Docker image matching the given arguments.

    Ideally this function should not be called directly, rather it should be called from the
    fit() function inside framework estimator.

    Args:
        framework (str): The name of the framework or algorithm.
        region (str): The AWS region.
        version (str): The framework or algorithm version. This is required if there is
            more than one supported version for the given framework or algorithm.
        py_version (str): The Python version. This is required if there is
            more than one supported Python version for the given framework version.
        instance_type (str): The SageMaker instance type. For supported types, see
            https://aws.amazon.com/sagemaker/pricing. This is required if
            there are different images for different processor types.
        accelerator_type (str): Elastic Inference accelerator type. For more, see
            https://docs.aws.amazon.com/sagemaker/latest/dg/ei.html.
        image_scope (str): The image type, i.e. what it is used for.
            Valid values: "training", "inference", "eia". If ``accelerator_type`` is set,
            ``image_scope`` is ignored.
        container_version (str): the version of docker image.
            Ideally the value of parameter should be created inside the framework.
            For custom use, see the list of supported container versions:
            https://github.com/aws/deep-learning-containers/blob/master/available_images.md
            (default: None).
        distribution (dict): A dictionary with information on how to run distributed training
        training_compiler_config (:class:`~sagemaker.training_compiler.TrainingCompilerConfig`):
            A configuration class for the SageMaker Training Compiler
            (default: None).
        model_id (str): The JumpStart model ID for which to retrieve the image URI
            (default: None).
        model_version (str): The version of the JumpStart model for which to retrieve the
            image URI (default: None).
        tolerate_vulnerable_model (bool): ``True`` if vulnerable versions of model specifications
            should be tolerated without an exception raised. If ``False``, raises an exception if
            the script used by this version of the model has dependencies with known security
            vulnerabilities. (Default: False).
        tolerate_deprecated_model (bool): True if deprecated versions of model specifications
            should be tolerated without an exception raised. If False, raises an exception
            if the version of the model is deprecated. (Default: False).
        sdk_version (str): the version of python-sdk that will be used in the image retrieval.
            (default: None).
        inference_tool (str): the tool that will be used to aid in the inference.
            Valid values: "neuron, None"
            (default: None).
        serverless_inference_config (sagemaker.serverless.ServerlessInferenceConfig):
            Specifies configuration related to serverless endpoint. Instance type is
            not provided in serverless inference. So this is used to determine processor type.

    Returns:
        str: The ECR URI for the corresponding SageMaker Docker image.

    Raises:
        NotImplementedError: If the scope is not supported.
        ValueError: If the combination of arguments specified is not supported.
        VulnerableJumpStartModelError: If any of the dependencies required by the script have
            known security vulnerabilities.
        DeprecatedJumpStartModelError: If the version of the model is deprecated.
    """
    if is_jumpstart_model_input(model_id, model_version):
        return artifacts._retrieve_image_uri(
            model_id,
            model_version,
            image_scope,
            framework,
            region,
            version,
            py_version,
            instance_type,
            accelerator_type,
            container_version,
            distribution,
            base_framework_version,
            training_compiler_config,
            tolerate_vulnerable_model,
            tolerate_deprecated_model,
        )
    if training_compiler_config is None:
        _framework = framework
        if framework == HUGGING_FACE_FRAMEWORK:
            inference_tool = _get_inference_tool(inference_tool, instance_type)
            if inference_tool == "neuron":
                _framework = f"{framework}-{inference_tool}"
        config = _config_for_framework_and_scope(_framework, image_scope,
                                                 accelerator_type)
    elif framework == HUGGING_FACE_FRAMEWORK:
        config = _config_for_framework_and_scope(
            framework + "-training-compiler", image_scope, accelerator_type)
    else:
        raise ValueError(
            "Unsupported Configuration: Training Compiler is only supported with HuggingFace"
        )

    original_version = version
    version = _validate_version_and_set_if_needed(version, config, framework)
    version_config = config["versions"][_version_for_config(version, config)]

    if framework == HUGGING_FACE_FRAMEWORK:
        if version_config.get("version_aliases"):
            full_base_framework_version = version_config[
                "version_aliases"].get(base_framework_version,
                                       base_framework_version)
        _validate_arg(full_base_framework_version, list(version_config.keys()),
                      "base framework")
        version_config = version_config.get(full_base_framework_version)

    py_version = _validate_py_version_and_set_if_needed(
        py_version, version_config, framework)
    version_config = version_config.get(py_version) or version_config
    registry = _registry_from_region(region, version_config["registries"])
    hostname = utils._botocore_resolver().construct_endpoint(
        "ecr", region)["hostname"]

    repo = version_config["repository"]

    processor = _processor(
        instance_type,
        config.get("processors") or version_config.get("processors"),
        serverless_inference_config,
    )

    # if container version is available in .json file, utilize that
    if version_config.get("container_version"):
        container_version = version_config["container_version"][processor]

    if framework == HUGGING_FACE_FRAMEWORK:
        pt_or_tf_version = (re.compile("^(pytorch|tensorflow)(.*)$").match(
            base_framework_version).group(2))
        _version = original_version

        if repo in [
                "huggingface-pytorch-trcomp-training",
                "huggingface-tensorflow-trcomp-training",
        ]:
            _version = version
        if repo in ["huggingface-pytorch-inference-neuron"]:
            if not sdk_version:
                sdk_version = _get_latest_versions(
                    version_config["sdk_versions"])
            container_version = sdk_version + "-" + container_version
            if config.get("version_aliases").get(original_version):
                _version = config.get("version_aliases")[original_version]
            if (config.get("versions",
                           {}).get(_version,
                                   {}).get("version_aliases",
                                           {}).get(base_framework_version,
                                                   {})):
                _base_framework_version = config.get("versions")[_version][
                    "version_aliases"][base_framework_version]
                pt_or_tf_version = (re.compile("^(pytorch|tensorflow)(.*)$").
                                    match(_base_framework_version).group(2))

        tag_prefix = f"{pt_or_tf_version}-transformers{_version}"
    else:
        tag_prefix = version_config.get("tag_prefix", version)

    tag = _format_tag(tag_prefix, processor, py_version, container_version,
                      inference_tool)

    if instance_type is not None and _should_auto_select_container_version(
            instance_type, distribution):
        container_versions = {
            "tensorflow-2.3-gpu-py37": "cu110-ubuntu18.04-v3",
            "tensorflow-2.3.1-gpu-py37": "cu110-ubuntu18.04",
            "tensorflow-2.3.2-gpu-py37": "cu110-ubuntu18.04",
            "tensorflow-1.15-gpu-py37": "cu110-ubuntu18.04-v8",
            "tensorflow-1.15.4-gpu-py37": "cu110-ubuntu18.04",
            "tensorflow-1.15.5-gpu-py37": "cu110-ubuntu18.04",
            "mxnet-1.8-gpu-py37": "cu110-ubuntu16.04-v1",
            "mxnet-1.8.0-gpu-py37": "cu110-ubuntu16.04",
            "pytorch-1.6-gpu-py36": "cu110-ubuntu18.04-v3",
            "pytorch-1.6.0-gpu-py36": "cu110-ubuntu18.04",
            "pytorch-1.6-gpu-py3": "cu110-ubuntu18.04-v3",
            "pytorch-1.6.0-gpu-py3": "cu110-ubuntu18.04",
        }
        key = "-".join([framework, tag])
        if key in container_versions:
            tag = "-".join([tag, container_versions[key]])

    if tag:
        repo += ":{}".format(tag)

    return ECR_URI_TEMPLATE.format(registry=registry,
                                   hostname=hostname,
                                   repository=repo)