def __init__(
        self,
        model_data,
        image,
        role=None,
        predictor_cls=None,
        env=None,
        name=None,
        vpc_config=None,
        sagemaker_session=None,
        enable_network_isolation=False,
        model_kms_key=None,
    ):
        """Initialize an SageMaker ``Model``.

        Args:
            model_data (str): The S3 location of a SageMaker model data
                ``.tar.gz`` file.
            image (str): A Docker image URI.
            role (str): An AWS IAM role (either name or full ARN). The Amazon
                SageMaker training jobs and APIs that create Amazon SageMaker
                endpoints use this role to access training data and model
                artifacts. After the endpoint is created, the inference code
                might use the IAM role if it needs to access some AWS resources.
                It can be null if this is being used to create a Model to pass
                to a ``PipelineModel`` which has its own Role field. (default:
                None)
            predictor_cls (callable[string, sagemaker.session.Session]): A
                function to call to create a predictor (default: None). If not
                None, ``deploy`` will return the result of invoking this
                function on the created endpoint name.
            env (dict[str, str]): Environment variables to run with ``image``
                when hosted in SageMaker (default: None).
            name (str): The model name. If None, a default model name will be
                selected on each ``deploy``.
            vpc_config (dict[str, list[str]]): The VpcConfig set on the model
                (default: None)
                * 'Subnets' (list[str]): List of subnet ids.
                * 'SecurityGroupIds' (list[str]): List of security group ids.
            sagemaker_session (sagemaker.session.Session): A SageMaker Session
                object, used for SageMaker interactions (default: None). If not
                specified, one is created using the default AWS configuration
                chain.
            enable_network_isolation (Boolean): Default False. if True, enables
                network isolation in the endpoint, isolating the model
                container. No inbound or outbound network calls can be made to
                or from the model container.
            model_kms_key (str): KMS key ARN used to encrypt the repacked
                model archive file if the model is repacked
        """
        LOGGER.warning(
            fw_utils.parameter_v2_rename_warning("image", "image_uri"))

        self.model_data = model_data
        self.image = image
        self.role = role
        self.predictor_cls = predictor_cls
        self.env = env or {}
        self.name = name
        self.vpc_config = vpc_config
        self.sagemaker_session = sagemaker_session
        self._model_name = None
        self.endpoint_name = None
        self._is_compiled_model = False
        self._enable_network_isolation = enable_network_isolation
        self.model_kms_key = model_kms_key
Example #2
0
    def __init__(self,
                 training_steps=None,
                 evaluation_steps=None,
                 checkpoint_path=None,
                 py_version=None,
                 framework_version=None,
                 model_dir=None,
                 requirements_file="",
                 image_name=None,
                 script_mode=False,
                 distributions=None,
                 **kwargs):
        """Initialize a ``TensorFlow`` estimator.

        Args:
            training_steps (int): Perform this many steps of training. `None`, the default means
                train forever.
            evaluation_steps (int): Perform this many steps of evaluation. `None`, the default
                means that evaluation runs until input from eval_input_fn is exhausted (or another
                exception is raised).
            checkpoint_path (str): Identifies S3 location where checkpoint data during model
                training can be saved (default: None). For distributed model training, this
                parameter is required.
            py_version (str): Python version you want to use for executing your model training
                code (default: 'py2').
            framework_version (str): TensorFlow version you want to use for executing your model
                training code. If not specified, this will default to 1.11.
            model_dir (str): S3 location where the checkpoint data and models can be exported to
                during training (default: None). It will be passed in the training script as one of
                the command line arguments. If not specified, one is provided based on
                your training configuration:

                * *distributed training with MPI* - ``/opt/ml/model``
                * *single-machine training or distributed training without MPI* - \
                    ``s3://{output_path}/model``
                * *Local Mode with local sources (file:// instead of s3://)* - \
                    ``/opt/ml/shared/model``

            requirements_file (str): Path to a ``requirements.txt`` file (default: ''). The path
                should be within and relative to ``source_dir``. Details on the format can be
                found in the Pip User Guide:
                <https://pip.pypa.io/en/stable/reference/pip_install/#requirements-file-format>
            image_name (str): If specified, the estimator will use this image for training and
                hosting, instead of selecting the appropriate SageMaker official image based on
                framework_version and py_version. It can be an ECR url or dockerhub image and tag.

                Examples:
                    123.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0
                    custom-image:latest.
            script_mode (bool): If set to True will the estimator will use the Script Mode
                containers (default: False). This will be ignored if py_version is set to 'py3'.
            distributions (dict): A dictionary with information on how to run distributed training
                (default: None). Currently we support distributed training with parameter servers
                and MPI.
                To enable parameter server use the following setup:

                .. code:: python

                    {
                        'parameter_server':
                        {
                            'enabled': True
                        }
                    }

                To enable MPI:

                .. code:: python

                    {
                        'mpi':
                        {
                            'enabled': True
                        }
                    }

            **kwargs: Additional kwargs passed to the Framework constructor.

        .. tip::

            You can find additional parameters for initializing this class at
            :class:`~sagemaker.estimator.Framework` and
            :class:`~sagemaker.estimator.EstimatorBase`.
        """
        if framework_version is None:
            logger.warning(
                fw.empty_framework_version_warning(defaults.TF_VERSION,
                                                   self.LATEST_VERSION))
        self.framework_version = framework_version or defaults.TF_VERSION

        if not py_version:
            py_version = "py3" if self._only_python_3_supported() else "py2"
        if py_version == "py2":
            logger.warning(
                fw.python_deprecation_warning(self.__framework_name__,
                                              defaults.LATEST_PY2_VERSION))

        if distributions is not None:
            logger.warning(
                fw.parameter_v2_rename_warning("distribution", distributions))
            train_instance_type = kwargs.get("train_instance_type")
            fw.warn_if_parameter_server_with_multi_gpu(
                training_instance_type=train_instance_type,
                distributions=distributions)

        if "enable_sagemaker_metrics" not in kwargs:
            # enable sagemaker metrics for TF v1.15 or greater:
            if fw.is_version_equal_or_higher([1, 15], self.framework_version):
                kwargs["enable_sagemaker_metrics"] = True

        super(TensorFlow, self).__init__(image_name=image_name, **kwargs)
        self.checkpoint_path = checkpoint_path

        self.py_version = py_version
        self.training_steps = training_steps
        self.evaluation_steps = evaluation_steps
        self.model_dir = model_dir
        self.script_mode = script_mode
        self.distributions = distributions or {}

        self._validate_args(
            py_version=py_version,
            script_mode=script_mode,
            framework_version=self.framework_version,
            training_steps=training_steps,
            evaluation_steps=evaluation_steps,
            requirements_file=requirements_file,
            checkpoint_path=checkpoint_path,
        )
        self._validate_requirements_file(requirements_file)
        self.requirements_file = requirements_file
    def __init__(self,
                 entry_point,
                 source_dir=None,
                 hyperparameters=None,
                 py_version="py2",
                 framework_version=None,
                 image_name=None,
                 distributions=None,
                 **kwargs):
        """This ``Estimator`` executes an MXNet script in a managed MXNet
        execution environment, within a SageMaker Training Job. The managed
        MXNet environment is an Amazon-built Docker container that executes
        functions defined in the supplied ``entry_point`` Python script.

        Training is started by calling
        :meth:`~sagemaker.amazon.estimator.Framework.fit` on this Estimator.
        After training is complete, calling
        :meth:`~sagemaker.amazon.estimator.Framework.deploy` creates a hosted
        SageMaker endpoint and returns an
        :class:`~sagemaker.amazon.mxnet.model.MXNetPredictor` instance that can
        be used to perform inference against the hosted model.

        Technical documentation on preparing MXNet scripts for SageMaker
        training and using the MXNet Estimator is available on the project
        home-page: https://github.com/aws/sagemaker-python-sdk

        Args:
            entry_point (str): Path (absolute or relative) to the Python source
                file which should be executed as the entry point to training.
                If ``source_dir`` is specified, then ``entry_point``
                must point to a file located at the root of ``source_dir``.
            source_dir (str): Path (absolute, relative or an S3 URI) to a directory
                with any other training source code dependencies aside from the entry
                point file (default: None). If ``source_dir`` is an S3 URI, it must
                point to a tar.gz file. Structure within this directory are preserved
                when training on Amazon SageMaker.
            hyperparameters (dict): Hyperparameters that will be used for
                training (default: None). The hyperparameters are made
                accessible as a dict[str, str] to the training code on
                SageMaker. For convenience, this accepts other types for keys
                and values, but ``str()`` will be called to convert them before
                training.
            py_version (str): Python version you want to use for executing your
                model training code (default: 'py2'). One of 'py2' or 'py3'.
            framework_version (str): MXNet version you want to use for executing
                your model training code. List of supported versions
                https://github.com/aws/sagemaker-python-sdk#mxnet-sagemaker-estimators.
                If not specified, this will default to 1.2.1.
            image_name (str): If specified, the estimator will use this image for training and
                hosting, instead of selecting the appropriate SageMaker official image based on
                framework_version and py_version. It can be an ECR url or dockerhub image and tag.

                Examples:
                    * ``123412341234.dkr.ecr.us-west-2.amazonaws.com/my-custom-image:1.0``
                    * ``custom-image:latest``

            distributions (dict): A dictionary with information on how to run distributed
                training (default: None). To have parameter servers launched for training,
                set this value to be ``{'parameter_server': {'enabled': True}}``.
            **kwargs: Additional kwargs passed to the
                :class:`~sagemaker.estimator.Framework` constructor.

        .. tip::

            You can find additional parameters for initializing this class at
            :class:`~sagemaker.estimator.Framework` and
            :class:`~sagemaker.estimator.EstimatorBase`.
        """
        if framework_version is None:
            logger.warning(
                empty_framework_version_warning(defaults.MXNET_VERSION,
                                                self.LATEST_VERSION))
        self.framework_version = framework_version or defaults.MXNET_VERSION

        if "enable_sagemaker_metrics" not in kwargs:
            # enable sagemaker metrics for MXNet v1.6 or greater:
            if is_version_equal_or_higher([1, 6], self.framework_version):
                kwargs["enable_sagemaker_metrics"] = True

        super(MXNet, self).__init__(entry_point,
                                    source_dir,
                                    hyperparameters,
                                    image_name=image_name,
                                    **kwargs)

        if py_version == "py2":
            logger.warning(
                python_deprecation_warning(self.__framework_name__,
                                           defaults.LATEST_PY2_VERSION))

        if distributions is not None:
            logger.warning(
                parameter_v2_rename_warning("distributions", "distribution"))
            train_instance_type = kwargs.get("train_instance_type")
            warn_if_parameter_server_with_multi_gpu(
                training_instance_type=train_instance_type,
                distributions=distributions)

        self.py_version = py_version
        self._configure_distribution(distributions)