def __init__( self, name: str, sagemaker_session, role, model_data: str, entry_point: str, source_dir: str = None, dependencies: List = None, depends_on: List[str] = None, **kwargs, ): """Constructs a TrainingStep, given an `EstimatorBase` instance. In addition to the estimator instance, the other arguments are those that are supplied to the `fit` method of the `sagemaker.estimator.Estimator`. Args: name (str): The name of the training step. estimator (EstimatorBase): A `sagemaker.estimator.EstimatorBase` instance. inputs (TrainingInput): A `sagemaker.inputs.TrainingInput` instance. Defaults to `None`. """ # yeah, go ahead and save the originals for now self._model_data = model_data self.sagemaker_session = sagemaker_session self.role = role if isinstance(model_data, Properties): self._model_prefix = model_data self._model_archive = "model.tar.gz" else: self._model_prefix = "/".join(self._model_data.split("/")[:-1]) self._model_archive = self._model_data.split("/")[-1] self._entry_point = entry_point self._entry_point_basename = os.path.basename(self._entry_point) self._source_dir = source_dir self._dependencies = dependencies # the real estimator and inputs repacker = SKLearn( framework_version=FRAMEWORK_VERSION, instance_type=INSTANCE_TYPE, entry_point=REPACK_SCRIPT, source_dir=self._source_dir, dependencies=self._dependencies, sagemaker_session=self.sagemaker_session, role=self.role, hyperparameters={ "inference_script": self._entry_point_basename, "model_archive": self._model_archive, }, **kwargs, ) repacker.disable_profiler = True inputs = TrainingInput(self._model_prefix) # super! super(_RepackModelStep, self).__init__(name=name, depends_on=depends_on, estimator=repacker, inputs=inputs)
def __init__( self, name: str, sagemaker_session, role, model_data: str, entry_point: str, display_name: str = None, description: str = None, source_dir: str = None, dependencies: List = None, depends_on: Optional[List[Union[str, Step, "StepCollection"]]] = None, retry_policies: List[RetryPolicy] = None, subnets=None, security_group_ids=None, **kwargs, ): """Base class initializer. Args: name (str): The name of the training step. sagemaker_session (sagemaker.session.Session): Session object which manages interactions with Amazon SageMaker APIs and any other AWS services needed. If not specified, the estimator creates one using the default AWS configuration chain. role (str): An AWS IAM role (either name or full ARN). The Amazon SageMaker training jobs and APIs that create Amazon SageMaker endpoints use this role to access training data and model artifacts. After the endpoint is created, the inference code might use the IAM role, if it needs to access an AWS resource. model_data (str): The S3 location of a SageMaker model data `.tar.gz` file. entry_point (str): Path (absolute or relative) to the local Python source file which should be executed as the entry point to inference. If ``source_dir`` is specified, then ``entry_point`` must point to a file located at the root of ``source_dir``. If 'git_config' is provided, 'entry_point' should be a relative location to the Python source file in the Git repo. Example: With the following GitHub repo directory structure: >>> |----- README.md >>> |----- src >>> |----- train.py >>> |----- test.py You can assign entry_point='src/train.py'. display_name (str): The display name of this `_RepackModelStep` step (default: None). description (str): The description of this `_RepackModelStep` (default: None). source_dir (str): A relative location to a directory with other training or model hosting source code dependencies aside from the entry point file in the Git repo (default: None). Structure within this directory are preserved when training on Amazon SageMaker. dependencies (list[str]): A list of paths to directories (absolute or relative) with any additional libraries that will be exported to the container (default: []). The library folders will be copied to SageMaker in the same folder where the entrypoint is copied. If 'git_config' is provided, 'dependencies' should be a list of relative locations to directories with any additional libraries needed in the Git repo. .. admonition:: Example The following call >>> Estimator(entry_point='train.py', ... dependencies=['my/libs/common', 'virtual-env']) results in the following inside the container: >>> $ ls >>> opt/ml/code >>> |------ train.py >>> |------ common >>> |------ virtual-env This is not supported with "local code" in Local Mode. depends_on (List[Union[str, Step, StepCollection]]): The list of `Step`/`StepCollection` names or `Step` instances or `StepCollection` instances that the current `Step` depends on (default: None). retry_policies (List[RetryPolicy]): The list of retry policies for the current step (default: None). subnets (list[str]): List of subnet ids. If not specified, the re-packing job will be created without VPC config (default: None). security_group_ids (list[str]): List of security group ids. If not specified, the re-packing job will be created without VPC config (default: None). **kwargs: additional arguments for the repacking job. """ self._model_data = model_data self.sagemaker_session = sagemaker_session self.role = role self._entry_point = entry_point self._entry_point_basename = os.path.basename(self._entry_point) self._source_dir = source_dir self._dependencies = dependencies # convert dependencies array into space-delimited string dependencies_hyperparameter = None if self._dependencies: dependencies_hyperparameter = " ".join(self._dependencies) # the real estimator and inputs repacker = SKLearn( framework_version=FRAMEWORK_VERSION, instance_type=INSTANCE_TYPE, entry_point=REPACK_SCRIPT, source_dir=self._source_dir, dependencies=self._dependencies, sagemaker_session=self.sagemaker_session, role=self.role, hyperparameters={ "inference_script": self._entry_point_basename, "model_archive": self._model_data, "dependencies": dependencies_hyperparameter, "source_dir": self._source_dir, }, subnets=subnets, security_group_ids=security_group_ids, **kwargs, ) repacker.disable_profiler = True inputs = TrainingInput(self._model_data) # super! super(_RepackModelStep, self).__init__( name=name, display_name=display_name, description=description, depends_on=depends_on, retry_policies=retry_policies, estimator=repacker, inputs=inputs, )