コード例 #1
0
    def __init__(
        self,
        name: str,
        sagemaker_session,
        role,
        model_data: str,
        entry_point: str,
        source_dir: str = None,
        dependencies: List = None,
        depends_on: List[str] = None,
        **kwargs,
    ):
        """Constructs a TrainingStep, given an `EstimatorBase` instance.

        In addition to the estimator instance, the other arguments are those that are supplied to
        the `fit` method of the `sagemaker.estimator.Estimator`.

        Args:
            name (str): The name of the training step.
            estimator (EstimatorBase): A `sagemaker.estimator.EstimatorBase` instance.
            inputs (TrainingInput): A `sagemaker.inputs.TrainingInput` instance. Defaults to `None`.
        """
        # yeah, go ahead and save the originals for now
        self._model_data = model_data
        self.sagemaker_session = sagemaker_session
        self.role = role
        if isinstance(model_data, Properties):
            self._model_prefix = model_data
            self._model_archive = "model.tar.gz"
        else:
            self._model_prefix = "/".join(self._model_data.split("/")[:-1])
            self._model_archive = self._model_data.split("/")[-1]
        self._entry_point = entry_point
        self._entry_point_basename = os.path.basename(self._entry_point)
        self._source_dir = source_dir
        self._dependencies = dependencies

        # the real estimator and inputs
        repacker = SKLearn(
            framework_version=FRAMEWORK_VERSION,
            instance_type=INSTANCE_TYPE,
            entry_point=REPACK_SCRIPT,
            source_dir=self._source_dir,
            dependencies=self._dependencies,
            sagemaker_session=self.sagemaker_session,
            role=self.role,
            hyperparameters={
                "inference_script": self._entry_point_basename,
                "model_archive": self._model_archive,
            },
            **kwargs,
        )
        repacker.disable_profiler = True
        inputs = TrainingInput(self._model_prefix)

        # super!
        super(_RepackModelStep, self).__init__(name=name,
                                               depends_on=depends_on,
                                               estimator=repacker,
                                               inputs=inputs)
コード例 #2
0
    def __init__(
        self,
        name: str,
        sagemaker_session,
        role,
        model_data: str,
        entry_point: str,
        display_name: str = None,
        description: str = None,
        source_dir: str = None,
        dependencies: List = None,
        depends_on: Optional[List[Union[str, Step, "StepCollection"]]] = None,
        retry_policies: List[RetryPolicy] = None,
        subnets=None,
        security_group_ids=None,
        **kwargs,
    ):
        """Base class initializer.

        Args:
            name (str): The name of the training step.
            sagemaker_session (sagemaker.session.Session): Session object which manages
                    interactions with Amazon SageMaker APIs and any other AWS services needed. If
                    not specified, the estimator creates one using the default
                    AWS configuration chain.
            role (str): An AWS IAM role (either name or full ARN). The Amazon
                    SageMaker training jobs and APIs that create Amazon SageMaker
                    endpoints use this role to access training data and model
                    artifacts. After the endpoint is created, the inference code
                    might use the IAM role, if it needs to access an AWS resource.
            model_data (str): The S3 location of a SageMaker model data `.tar.gz` file.
            entry_point (str): Path (absolute or relative) to the local Python
                    source file which should be executed as the entry point to
                    inference. If ``source_dir`` is specified, then ``entry_point``
                    must point to a file located at the root of ``source_dir``.
                    If 'git_config' is provided, 'entry_point' should be
                    a relative location to the Python source file in the Git repo.

                    Example:
                        With the following GitHub repo directory structure:

                        >>> |----- README.md
                        >>> |----- src
                        >>>         |----- train.py
                        >>>         |----- test.py

                        You can assign entry_point='src/train.py'.
            display_name (str): The display name of this `_RepackModelStep` step (default: None).
            description (str): The description of this `_RepackModelStep` (default: None).
            source_dir (str): A relative location to a directory with other training
                or model hosting source code dependencies aside from the entry point
                file in the Git repo (default: None). Structure within this
                directory are preserved when training on Amazon SageMaker.
            dependencies (list[str]): A list of paths to directories (absolute
                    or relative) with any additional libraries that will be exported
                    to the container (default: []). The library folders will be
                    copied to SageMaker in the same folder where the entrypoint is
                    copied. If 'git_config' is provided, 'dependencies' should be a
                    list of relative locations to directories with any additional
                    libraries needed in the Git repo.

                    .. admonition:: Example

                        The following call

                        >>> Estimator(entry_point='train.py',
                        ...           dependencies=['my/libs/common', 'virtual-env'])

                        results in the following inside the container:

                        >>> $ ls

                        >>> opt/ml/code
                        >>>     |------ train.py
                        >>>     |------ common
                        >>>     |------ virtual-env

                    This is not supported with "local code" in Local Mode.
            depends_on (List[Union[str, Step, StepCollection]]): The list of `Step`/`StepCollection`
                names or `Step` instances or `StepCollection` instances that the current `Step`
                depends on (default: None).
            retry_policies (List[RetryPolicy]): The list of retry policies for the current step
                (default: None).
            subnets (list[str]): List of subnet ids. If not specified, the re-packing
                    job will be created without VPC config (default: None).
            security_group_ids (list[str]): List of security group ids. If not
                specified, the re-packing job will be created without VPC config (default: None).
            **kwargs: additional arguments for the repacking job.
        """
        self._model_data = model_data
        self.sagemaker_session = sagemaker_session
        self.role = role
        self._entry_point = entry_point
        self._entry_point_basename = os.path.basename(self._entry_point)
        self._source_dir = source_dir
        self._dependencies = dependencies

        # convert dependencies array into space-delimited string
        dependencies_hyperparameter = None
        if self._dependencies:
            dependencies_hyperparameter = " ".join(self._dependencies)

        # the real estimator and inputs
        repacker = SKLearn(
            framework_version=FRAMEWORK_VERSION,
            instance_type=INSTANCE_TYPE,
            entry_point=REPACK_SCRIPT,
            source_dir=self._source_dir,
            dependencies=self._dependencies,
            sagemaker_session=self.sagemaker_session,
            role=self.role,
            hyperparameters={
                "inference_script": self._entry_point_basename,
                "model_archive": self._model_data,
                "dependencies": dependencies_hyperparameter,
                "source_dir": self._source_dir,
            },
            subnets=subnets,
            security_group_ids=security_group_ids,
            **kwargs,
        )
        repacker.disable_profiler = True
        inputs = TrainingInput(self._model_data)

        # super!
        super(_RepackModelStep, self).__init__(
            name=name,
            display_name=display_name,
            description=description,
            depends_on=depends_on,
            retry_policies=retry_policies,
            estimator=repacker,
            inputs=inputs,
        )