def test_properties_describe_processing_job_response():
    prop = Properties("Steps.MyStep", "DescribeProcessingJobResponse")
    some_prop_names = [
        "ProcessingInputs", "ProcessingOutputConfig", "ProcessingEndTime"
    ]
    for name in some_prop_names:
        assert name in prop.__dict__.keys()
    assert prop.ProcessingJobName.expr == {
        "Get": "Steps.MyStep.ProcessingJobName"
    }
    assert prop.ProcessingOutputConfig.Outputs[
        "MyOutputName"].S3Output.S3Uri.expr == {
            "Get":
            "Steps.MyStep.ProcessingOutputConfig.Outputs['MyOutputName'].S3Output.S3Uri"
        }
Example #2
0
def test_condition_in_mixed():
    param = ParameterString(name="MyStr")
    prop = Properties("foo")
    var = ExecutionVariables.START_DATETIME
    cond_in = ConditionIn(value=param, in_values=["abc", prop, var])
    assert cond_in.to_request() == {
        "Type": "In",
        "QueryValue": {
            "Get": "Parameters.MyStr"
        },
        "Values": ["abc", {
            "Get": "foo"
        }, {
            "Get": "Execution.StartDateTime"
        }],
    }
Example #3
0
    def __init__(
        self,
        name: str,
        processor: Processor,
        inputs: List[ProcessingInput] = None,
        outputs: List[ProcessingOutput] = None,
        job_arguments: List[str] = None,
        code: str = None,
        property_files: List[PropertyFile] = None,
        cache_config: CacheConfig = None,
    ):
        """Construct a ProcessingStep, given a `Processor` instance.

        In addition to the processor instance, the other arguments are those that are supplied to
        the `process` method of the `sagemaker.processing.Processor`.

        Args:
            name (str): The name of the processing step.
            processor (Processor): A `sagemaker.processing.Processor` instance.
            inputs (List[ProcessingInput]): A list of `sagemaker.processing.ProcessorInput`
                instances. Defaults to `None`.
            outputs (List[ProcessingOutput]): A list of `sagemaker.processing.ProcessorOutput`
                instances. Defaults to `None`.
            job_arguments (List[str]): A list of strings to be passed into the processing job.
                Defaults to `None`.
            code (str): This can be an S3 URI or a local path to a file with the framework
                script to run. Defaults to `None`.
            property_files (List[PropertyFile]): A list of property files that workflow looks
                for and resolves from the configured processing output list.
            cache_config (CacheConfig):  A `sagemaker.workflow.steps.CacheConfig` instance.
        """
        super(ProcessingStep, self).__init__(name, StepTypeEnum.PROCESSING)
        self.processor = processor
        self.inputs = inputs
        self.outputs = outputs
        self.job_arguments = job_arguments
        self.code = code
        self.property_files = property_files

        # Examine why run method in sagemaker.processing.Processor mutates the processor instance
        # by setting the instance's arguments attribute. Refactor Processor.run, if possible.
        self.processor.arguments = job_arguments

        self._properties = Properties(
            path=f"Steps.{name}", shape_name="DescribeProcessingJobResponse"
        )
        self.cache_config = cache_config
def test_implicit_value():
    prop = Properties("Steps.MyStep", "DescribeTrainingJobResponse")

    with pytest.raises(TypeError) as error:
        str(prop.CreationTime)
    assert str(
        error.value) == "Pipeline variables do not support __str__ operation."

    with pytest.raises(TypeError) as error:
        int(prop.CreationTime)
    assert str(
        error.value) == "Pipeline variables do not support __int__ operation."

    with pytest.raises(TypeError) as error:
        float(prop.CreationTime)
    assert str(error.value
               ) == "Pipeline variables do not support __float__ operation."
Example #5
0
    def __init__(
        self,
        name: str,
        estimator: EstimatorBase,
        model: Model,
        inputs: CompilationInput = None,
        job_arguments: List[str] = None,
        depends_on: Union[List[str], List[Step]] = None,
        retry_policies: List[RetryPolicy] = None,
        display_name: str = None,
        description: str = None,
        cache_config: CacheConfig = None,
    ):
        """Construct a CompilationStep.

        Given an `EstimatorBase` and a `sagemaker.model.Model` instance construct a CompilationStep.

        In addition to the estimator and Model instances, the other arguments are those that are
        supplied to the `compile_model` method of the `sagemaker.model.Model.compile_model`.

        Args:
            name (str): The name of the compilation step.
            estimator (EstimatorBase): A `sagemaker.estimator.EstimatorBase` instance.
            model (Model): A `sagemaker.model.Model` instance.
            inputs (CompilationInput): A `sagemaker.inputs.CompilationInput` instance.
                Defaults to `None`.
            job_arguments (List[str]): A list of strings to be passed into the processing job.
                Defaults to `None`.
            depends_on (List[str] or List[Step]): A list of step names or step instances
                this `sagemaker.workflow.steps.CompilationStep` depends on
            retry_policies (List[RetryPolicy]):  A list of retry policy
            display_name (str): The display name of the compilation step.
            description (str): The description of the compilation step.
            cache_config (CacheConfig):  A `sagemaker.workflow.steps.CacheConfig` instance.
        """
        super(CompilationStep,
              self).__init__(name, StepTypeEnum.COMPILATION, display_name,
                             description, depends_on, retry_policies)
        self.estimator = estimator
        self.model = model
        self.inputs = inputs
        self.job_arguments = job_arguments
        self._properties = Properties(
            path=f"Steps.{name}", shape_name="DescribeCompilationJobResponse")
        self.cache_config = cache_config
    def __init__(
        self,
        name: str,
        estimator: EstimatorBase,
        inputs: Union[TrainingInput, dict, str, FileSystemInput] = None,
        cache_config: CacheConfig = None,
        depends_on: List[str] = None,
    ):
        """Construct a TrainingStep, given an `EstimatorBase` instance.

        In addition to the estimator instance, the other arguments are those that are supplied to
        the `fit` method of the `sagemaker.estimator.Estimator`.

        Args:
            name (str): The name of the training step.
            estimator (EstimatorBase): A `sagemaker.estimator.EstimatorBase` instance.
            inputs (str or dict or sagemaker.inputs.TrainingInput
                or sagemaker.inputs.FileSystemInput): Information
                about the training data. This can be one of three types:

                * (str) the S3 location where training data is saved, or a file:// path in
                    local mode.
                * (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) If using multiple
                    channels for training data, you can specify a dict mapping channel names to
                    strings or :func:`~sagemaker.inputs.TrainingInput` objects.
                * (sagemaker.inputs.TrainingInput) - channel configuration for S3 data sources
                    that can provide additional information as well as the path to the training
                    dataset.
                    See :func:`sagemaker.inputs.TrainingInput` for full details.
                * (sagemaker.inputs.FileSystemInput) - channel configuration for
                    a file system data source that can provide additional information as well as
                    the path to the training dataset.

            cache_config (CacheConfig):  A `sagemaker.workflow.steps.CacheConfig` instance.
            depends_on (List[str]): A list of step names this `sagemaker.workflow.steps.TrainingStep`
                depends on
        """
        super(TrainingStep, self).__init__(name, StepTypeEnum.TRAINING,
                                           depends_on)
        self.estimator = estimator
        self.inputs = inputs
        self._properties = Properties(path=f"Steps.{name}",
                                      shape_name="DescribeTrainingJobResponse")
        self.cache_config = cache_config
Example #7
0
    def __init__(
        self, name: str, model: Model, inputs: CreateModelInput, depends_on: List[str] = None
    ):
        """Construct a CreateModelStep, given an `sagemaker.model.Model` instance.

        In addition to the Model instance, the other arguments are those that are supplied to
        the `_create_sagemaker_model` method of the `sagemaker.model.Model._create_sagemaker_model`.

        Args:
            name (str): The name of the CreateModel step.
            model (Model): A `sagemaker.model.Model` instance.
            inputs (CreateModelInput): A `sagemaker.inputs.CreateModelInput` instance.
                Defaults to `None`.
            depends_on (List[str]): A list of step names this `sagemaker.workflow.steps.CreateModelStep`
                depends on
        """
        super(CreateModelStep, self).__init__(name, StepTypeEnum.CREATE_MODEL, depends_on)
        self.model = model
        self.inputs = inputs or CreateModelInput()

        self._properties = Properties(path=f"Steps.{name}", shape_name="DescribeModelOutput")
Example #8
0
    def __init__(
        self,
        name: str,
        estimator: EstimatorBase,
        inputs: TrainingInput = None,
    ):
        """Construct a TrainingStep, given an `EstimatorBase` instance.

        In addition to the estimator instance, the other arguments are those that are supplied to
        the `fit` method of the `sagemaker.estimator.Estimator`.

        Args:
            name (str): The name of the training step.
            estimator (EstimatorBase): A `sagemaker.estimator.EstimatorBase` instance.
            inputs (TrainingInput): A `sagemaker.inputs.TrainingInput` instance. Defaults to `None`.
        """
        super(TrainingStep, self).__init__(name, StepTypeEnum.TRAINING)
        self.estimator = estimator
        self.inputs = inputs

        self._properties = Properties(path=f"Steps.{name}",
                                      shape_name="DescribeTrainingJobResponse")
Example #9
0
    def __init__(
        self,
        name: str,
        transformer: Transformer,
        inputs: TransformInput,
    ):
        """Constructs a TransformStep, given an `Transformer` instance.

        In addition to the transformer instance, the other arguments are those that are supplied to
        the `transform` method of the `sagemaker.transformer.Transformer`.

        Args:
            name (str): The name of the transform step.
            transformer (Transformer): A `sagemaker.transformer.Transformer` instance.
            inputs (TransformInput): A `sagemaker.inputs.TransformInput` instance.
        """
        super(TransformStep, self).__init__(name, StepTypeEnum.TRANSFORM)
        self.transformer = transformer
        self.inputs = inputs

        self._properties = Properties(
            path=f"Steps.{name}", shape_name="DescribeTransformJobResponse")
def test_join_expressions():
    assert Join(values=[
        "foo",
        ParameterFloat(name="MyFloat"),
        ParameterInteger(name="MyInt"),
        ParameterString(name="MyStr"),
        Properties(path="Steps.foo.OutputPath.S3Uri"),
        ExecutionVariables.PIPELINE_EXECUTION_ID,
        Join(on=",", values=[1, "a", False, 1.1]),
    ]).expr == {
        "Std:Join": {
            "On":
            "",
            "Values": [
                "foo",
                {
                    "Get": "Parameters.MyFloat"
                },
                {
                    "Get": "Parameters.MyInt"
                },
                {
                    "Get": "Parameters.MyStr"
                },
                {
                    "Get": "Steps.foo.OutputPath.S3Uri"
                },
                {
                    "Get": "Execution.PipelineExecutionId"
                },
                {
                    "Std:Join": {
                        "On": ",",
                        "Values": [1, "a", False, 1.1]
                    }
                },
            ],
        },
    }
Example #11
0
    def __init__(
        self,
        name: str,
        tuner: HyperparameterTuner,
        inputs=None,
        job_arguments: List[str] = None,
        cache_config: CacheConfig = None,
        depends_on: List[str] = None,
    ):
        """Construct a TuningStep, given a `HyperparameterTuner` instance.

        In addition to the tuner instance, the other arguments are those that are supplied to
        the `fit` method of the `sagemaker.tuner.HyperparameterTuner`.

        Args:
            name (str): The name of the tuning step.
            tuner (HyperparameterTuner): A `sagemaker.tuner.HyperparameterTuner` instance.
            inputs: Information about the training data. Please refer to the
                ``fit()`` method of the associated estimator, as this can take
                any of the following forms:

                * (str) - The S3 location where training data is saved.
                * (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) -
                    If using multiple channels for training data, you can specify
                    a dict mapping channel names to strings or
                    :func:`~sagemaker.inputs.TrainingInput` objects.
                * (sagemaker.inputs.TrainingInput) - Channel configuration for S3 data sources
                    that can provide additional information about the training dataset.
                    See :func:`sagemaker.inputs.TrainingInput` for full details.
                * (sagemaker.session.FileSystemInput) - channel configuration for
                    a file system data source that can provide additional information as well as
                    the path to the training dataset.
                * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of
                    Amazon :class:~`Record` objects serialized and stored in S3.
                    For use with an estimator for an Amazon algorithm.
                * (sagemaker.amazon.amazon_estimator.FileSystemRecordSet) -
                    Amazon SageMaker channel configuration for a file system data source for
                    Amazon algorithms.
                * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of
                    :class:~`sagemaker.amazon.amazon_estimator.RecordSet` objects,
                    where each instance is a different channel of training data.
                * (list[sagemaker.amazon.amazon_estimator.FileSystemRecordSet]) - A list of
                    :class:~`sagemaker.amazon.amazon_estimator.FileSystemRecordSet` objects,
                    where each instance is a different channel of training data.
            job_arguments (List[str]): A list of strings to be passed into the processing job.
                Defaults to `None`.
            cache_config (CacheConfig):  A `sagemaker.workflow.steps.CacheConfig` instance.
            depends_on (List[str]): A list of step names this `sagemaker.workflow.steps.ProcessingStep`
                depends on
        """
        super(TuningStep, self).__init__(name, StepTypeEnum.TUNING, depends_on)
        self.tuner = tuner
        self.inputs = inputs
        self.job_arguments = job_arguments
        self._properties = Properties(
            path=f"Steps.{name}",
            shape_names=[
                "DescribeHyperParameterTuningJobResponse",
                "ListTrainingJobsForHyperParameterTuningJobResponse",
            ],
        )
        self.cache_config = cache_config
Example #12
0
    def __init__(
        self,
        name: str,
        step_args: Dict = None,
        tuner: HyperparameterTuner = None,
        display_name: str = None,
        description: str = None,
        inputs=None,
        job_arguments: List[str] = None,
        cache_config: CacheConfig = None,
        depends_on: Optional[List[Union[str, Step, "StepCollection"]]] = None,
        retry_policies: List[RetryPolicy] = None,
    ):
        """Construct a `TuningStep`, given a `HyperparameterTuner` instance.

        In addition to the `HyperparameterTuner` instance, the other arguments are those
        that are supplied to the `fit` method of the `sagemaker.tuner.HyperparameterTuner`.

        Args:
            name (str): The name of the `TuningStep`.
            step_args: The arguments for the `TuningStep` definition.
            tuner (HyperparameterTuner): A `sagemaker.tuner.HyperparameterTuner` instance.
            display_name (str): The display name of the `TuningStep`.
            description (str): The description of the `TuningStep`.
            inputs: Information about the training data. Please refer to the
                `fit()` method of the associated estimator, as this can take
                any of the following forms:

                * (str) - The S3 location where training data is saved.
                * (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) -
                    If using multiple channels for training data, you can specify
                    a dictionary mapping channel names to strings or
                    :func:`~sagemaker.inputs.TrainingInput` objects.
                * (sagemaker.inputs.TrainingInput) - Channel configuration for S3 data sources
                    that can provide additional information about the training dataset.
                    See :func:`sagemaker.inputs.TrainingInput` for full details.
                * (sagemaker.session.FileSystemInput) - channel configuration for
                    a file system data source that can provide additional information as well as
                    the path to the training dataset.
                * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of
                    Amazon :class:~`Record` objects serialized and stored in S3.
                    For use with an estimator for an Amazon algorithm.
                * (sagemaker.amazon.amazon_estimator.FileSystemRecordSet) -
                    Amazon SageMaker channel configuration for a file system data source for
                    Amazon algorithms.
                * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of
                    :class:~`sagemaker.amazon.amazon_estimator.RecordSet` objects,
                    where each instance is a different channel of training data.
                * (list[sagemaker.amazon.amazon_estimator.FileSystemRecordSet]) - A list of
                    :class:~`sagemaker.amazon.amazon_estimator.FileSystemRecordSet` objects,
                    where each instance is a different channel of training data.
            job_arguments (List[str]): A list of strings to be passed into the processing job.
                Defaults to `None`.
            cache_config (CacheConfig):  A `sagemaker.workflow.steps.CacheConfig` instance.
            depends_on (List[Union[str, Step, StepCollection]]): A list of `Step`/`StepCollection`
                names or `Step` instances or `StepCollection` instances that this `TuningStep`
                depends on.
            retry_policies (List[RetryPolicy]):  A list of retry policies.
        """
        super(TuningStep,
              self).__init__(name, StepTypeEnum.TUNING, display_name,
                             description, depends_on, retry_policies)

        if not (step_args is not None) ^ (tuner is not None):
            raise ValueError(
                "either step_args or tuner need to be given, but not both.")

        self.step_args = step_args
        self.tuner = tuner
        self.inputs = inputs
        self.job_arguments = job_arguments
        self._properties = Properties(
            path=f"Steps.{name}",
            shape_names=[
                "DescribeHyperParameterTuningJobResponse",
                "ListTrainingJobsForHyperParameterTuningJobResponse",
            ],
        )
        self.cache_config = cache_config

        if not self.step_args:
            warnings.warn(
                ('We are deprecating the instantiation of TuningStep using "tuner".'
                 'Instead, simply using "step_args".'),
                DeprecationWarning,
            )
Example #13
0
    def __init__(
        self,
        name: str,
        step_args: Dict = None,
        processor: Processor = None,
        display_name: str = None,
        description: str = None,
        inputs: List[ProcessingInput] = None,
        outputs: List[ProcessingOutput] = None,
        job_arguments: List[str] = None,
        code: str = None,
        property_files: List[PropertyFile] = None,
        cache_config: CacheConfig = None,
        depends_on: Optional[List[Union[str, Step, "StepCollection"]]] = None,
        retry_policies: List[RetryPolicy] = None,
        kms_key=None,
    ):
        """Construct a `ProcessingStep`, given a `Processor` instance.

        In addition to the `Processor` instance, the other arguments are those that are supplied to
        the `process` method of the `sagemaker.processing.Processor`.

        Args:
            name (str): The name of the `ProcessingStep`.
            step_args: The arguments for the `ProcessingStep` definition.
            processor (Processor): A `sagemaker.processing.Processor` instance.
            display_name (str): The display name of the `ProcessingStep`.
            description (str): The description of the `ProcessingStep`
            inputs (List[ProcessingInput]): A list of `sagemaker.processing.ProcessorInput`
                instances. Defaults to `None`.
            outputs (List[ProcessingOutput]): A list of `sagemaker.processing.ProcessorOutput`
                instances. Defaults to `None`.
            job_arguments (List[str]): A list of strings to be passed into the processing job.
                Defaults to `None`.
            code (str): This can be an S3 URI or a local path to a file with the framework
                script to run. Defaults to `None`.
            property_files (List[PropertyFile]): A list of property files that workflow looks
                for and resolves from the configured processing output list.
            cache_config (CacheConfig):  A `sagemaker.workflow.steps.CacheConfig` instance.
            depends_on (List[Union[str, Step, StepCollection]]): A list of `Step`/`StepCollection`
                names or `Step` instances or `StepCollection` instances that this `ProcessingStep`
                depends on.
            retry_policies (List[RetryPolicy]):  A list of retry policies.
            kms_key (str): The ARN of the KMS key that is used to encrypt the
                user code file. Defaults to `None`.
        """
        super(ProcessingStep,
              self).__init__(name, StepTypeEnum.PROCESSING, display_name,
                             description, depends_on, retry_policies)

        if not (step_args is not None) ^ (processor is not None):
            raise ValueError(
                "either step_args or processor need to be given, but not both."
            )

        self.step_args = step_args
        self.processor = processor
        self.inputs = inputs
        self.outputs = outputs
        self.job_arguments = job_arguments
        self.code = code
        self.property_files = property_files
        self.job_name = None
        self.kms_key = kms_key
        self.cache_config = cache_config
        self._properties = Properties(
            path=f"Steps.{name}", shape_name="DescribeProcessingJobResponse")

        if not self.step_args:
            # Examine why run method in `sagemaker.processing.Processor`
            # mutates the processor instance by setting the instance's
            # arguments attribute. Refactor `Processor.run`, if possible.
            self.processor.arguments = job_arguments

            if code:
                code_url = urlparse(code)
                if code_url.scheme == "" or code_url.scheme == "file":
                    # By default, `Processor` will upload the local code to an S3 path
                    # containing a timestamp. This causes cache misses whenever a
                    # pipeline is updated, even if the underlying script hasn't changed.
                    # To avoid this, hash the contents of the script and include it
                    # in the `job_name` passed to the `Processor`, which will be used
                    # instead of the timestamped path.
                    self.job_name = self._generate_code_upload_path()

            warnings.warn(
                ('We are deprecating the instantiation of ProcessingStep using "processor".'
                 'Instead, simply using "step_args".'),
                DeprecationWarning,
            )
def test_string_builtin_funcs_that_return_bool():
    prop = Properties("Steps.MyStep", "DescribeModelPackageOutput")
    # The prop will only be parsed in runtime (Pipeline backend) so not able to tell in SDK
    assert not prop.startswith("s3")
    assert not prop.endswith("s3")
Example #15
0
 def __init__(self, name, display_name=None, description=None):
     super(CustomStep, self).__init__(name, display_name, description,
                                      StepTypeEnum.TRAINING)
     self._properties = Properties(path=f"Steps.{name}")
Example #16
0
    def __init__(
        self,
        name: str,
        step_args: Optional[dict] = None,
        content_types: Optional[list] = None,
        response_types: Optional[list] = None,
        inference_instances: Optional[list] = None,
        transform_instances: Optional[list] = None,
        estimator: EstimatorBase = None,
        model_data=None,
        model_package_group_name=None,
        model_metrics=None,
        metadata_properties=None,
        approval_status="PendingManualApproval",
        image_uri=None,
        compile_model_family=None,
        display_name: str = None,
        description=None,
        depends_on: Optional[List[Union[str, Step, "StepCollection"]]] = None,
        retry_policies: Optional[List[RetryPolicy]] = None,
        tags=None,
        container_def_list=None,
        drift_check_baselines=None,
        customer_metadata_properties=None,
        **kwargs,
    ):
        """Constructor of a register model step.

        Args:
            name (str): The name of the training step.
            step_args (dict): The arguments for this `_RegisterModelStep` definition
                (default: None).
            content_types (list): The supported MIME types for the input data (default: None).
            response_types (list): The supported MIME types for the output data (default: None).
            inference_instances (list): A list of the instance types that are used to
                generate inferences in real-time (default: None).
            transform_instances (list): A list of the instance types on which a
                transformation job can be run or on which an endpoint
                can be deployed (default: None).
            estimator (EstimatorBase): A `sagemaker.estimator.EstimatorBase` instance
                (default: None).
            model_data: the S3 URI to the model data from training (default: None).
            model_package_group_name (str): Model Package Group name, exclusive to
                `model_package_name`, using `model_package_group_name`
                makes the Model Package versioned (default: None).
            model_metrics (ModelMetrics): ModelMetrics object (default: None).
            metadata_properties (MetadataProperties): MetadataProperties object (default: None).
            approval_status (str): Model Approval Status, values can be "Approved",
                "Rejected", or "PendingManualApproval" (default: "PendingManualApproval").
            image_uri (str): The container image uri for Model Package, if not specified,
                Estimator's training container image will be used (default: None).
            compile_model_family (str): Instance family for compiled model,
                if specified, a compiled model will be used (default: None).
            display_name (str): The display name of this `_RegisterModelStep` step (default: None).
            description (str): Model Package description (default: None).
            depends_on (List[Union[str, Step, StepCollection]]): The list of `Step`/`StepCollection`
                names or `Step` instances or `StepCollection` instances that the current `Step`
                depends on (default: None).
            retry_policies (List[RetryPolicy]): The list of retry policies for the current step
                (default: None).
            tags (List[dict[str, str]]): A list of dictionaries containing key-value pairs used to
                configure the create model package request (default: None).
            container_def_list (list): A list of container definitions (default: None).
            drift_check_baselines (DriftCheckBaselines): DriftCheckBaselines object (default: None).
            customer_metadata_properties (dict[str, str]): A dictionary of key-value paired
                metadata properties (default: None).
            **kwargs: additional arguments to `create_model`.
        """
        super(_RegisterModelStep,
              self).__init__(name, StepTypeEnum.REGISTER_MODEL, display_name,
                             description, depends_on, retry_policies)
        deprecated_args_missing = (content_types is None
                                   or response_types is None
                                   or inference_instances is None
                                   or transform_instances is None)
        if not (step_args is None) ^ deprecated_args_missing:
            raise ValueError(
                "step_args and the set of (content_types, response_types, "
                "inference_instances, transform_instances) are mutually exclusive. "
                "Either of them should be provided.")

        self.step_args = step_args
        self.estimator = estimator
        self.model_data = model_data
        self.content_types = content_types
        self.response_types = response_types
        self.inference_instances = inference_instances
        self.transform_instances = transform_instances
        self.model_package_group_name = model_package_group_name
        self.tags = tags
        self.model_metrics = model_metrics
        self.drift_check_baselines = drift_check_baselines
        self.customer_metadata_properties = customer_metadata_properties
        self.metadata_properties = metadata_properties
        self.approval_status = approval_status
        self.image_uri = image_uri
        self.compile_model_family = compile_model_family
        self.description = description
        self.tags = tags
        self.kwargs = kwargs
        self.container_def_list = container_def_list

        self._properties = Properties(path=f"Steps.{name}",
                                      shape_name="DescribeModelPackageOutput")
Example #17
0
    def __init__(
        self,
        name: str,
        clarify_check_config: ClarifyCheckConfig,
        check_job_config: CheckJobConfig,
        skip_check: Union[bool, PipelineNonPrimitiveInputTypes] = False,
        register_new_baseline: Union[bool,
                                     PipelineNonPrimitiveInputTypes] = False,
        model_package_group_name: Union[str,
                                        PipelineNonPrimitiveInputTypes] = None,
        supplied_baseline_constraints: Union[
            str, PipelineNonPrimitiveInputTypes] = None,
        display_name: str = None,
        description: str = None,
        cache_config: CacheConfig = None,
        depends_on: Union[List[str], List[Step]] = None,
    ):
        """Constructs a ClarifyCheckStep.

        Args:
            name (str): The name of the ClarifyCheckStep step.
            clarify_check_config (ClarifyCheckConfig): A ClarifyCheckConfig instance.
            check_job_config (CheckJobConfig): A CheckJobConfig instance.
            skip_check (bool or PipelineNonPrimitiveInputTypes): Whether the check
                should be skipped (default: False).
            register_new_baseline (bool or PipelineNonPrimitiveInputTypes): Whether
                the new baseline should be registered (default: False).
            model_package_group_name (str or PipelineNonPrimitiveInputTypes): The name of a
                registered model package group, among which the baseline will be fetched
                from the latest approved model (default: None).
            supplied_baseline_constraints (str or PipelineNonPrimitiveInputTypes): The S3 path
                to the supplied constraints object representing the constraints JSON file
                which will be used for drift to check (default: None).
            display_name (str): The display name of the ClarifyCheckStep step (default: None).
            description (str): The description of the ClarifyCheckStep step (default: None).
            cache_config (CacheConfig):  A `sagemaker.workflow.steps.CacheConfig` instance
                (default: None).
            depends_on (List[str] or List[Step]): A list of step names or step instances
                this `sagemaker.workflow.steps.ClarifyCheckStep` depends on (default: None).
        """
        if (not isinstance(clarify_check_config, DataBiasCheckConfig)
                and not isinstance(clarify_check_config, ModelBiasCheckConfig)
                and not isinstance(clarify_check_config,
                                   ModelExplainabilityCheckConfig)):
            raise RuntimeError(
                "The clarify_check_config can only be object of " +
                "DataBiasCheckConfig, ModelBiasCheckConfig or ModelExplainabilityCheckConfig"
            )

        if is_pipeline_variable(clarify_check_config.data_config.
                                s3_analysis_config_output_path):
            raise RuntimeError(
                "s3_analysis_config_output_path cannot be of type " +
                "ExecutionVariable/Expression/Parameter/Properties")

        if (not clarify_check_config.data_config.s3_analysis_config_output_path
                and is_pipeline_variable(
                    clarify_check_config.data_config.s3_output_path)):
            raise RuntimeError(
                "`s3_output_path` cannot be of type ExecutionVariable/Expression/Parameter"
                +
                "/Properties if `s3_analysis_config_output_path` is none or empty "
            )

        super(ClarifyCheckStep,
              self).__init__(name, display_name, description,
                             StepTypeEnum.CLARIFY_CHECK, depends_on)
        self.skip_check = skip_check
        self.register_new_baseline = register_new_baseline
        self.clarify_check_config = clarify_check_config
        self.check_job_config = check_job_config
        self.model_package_group_name = model_package_group_name
        self.supplied_baseline_constraints = supplied_baseline_constraints
        self.cache_config = cache_config

        if isinstance(self.clarify_check_config,
                      ModelExplainabilityCheckConfig):
            self._model_monitor = self.check_job_config._generate_model_monitor(
                "ModelExplainabilityMonitor")
        else:
            self._model_monitor = self.check_job_config._generate_model_monitor(
                "ModelBiasMonitor")

        self.clarify_check_config.monitoring_analysis_config_uri = (
            self._upload_monitoring_analysis_config())
        self._baselining_processor = self._model_monitor._create_baselining_processor(
        )
        self._processing_params = self._generate_processing_job_parameters(
            self._generate_processing_job_analysis_config(),
            self._baselining_processor)

        root_path = f"Steps.{name}"
        root_prop = Properties(path=root_path)
        root_prop.__dict__["CalculatedBaselineConstraints"] = Properties(
            f"{root_path}.CalculatedBaselineConstraints")
        root_prop.__dict__[
            "BaselineUsedForDriftCheckConstraints"] = Properties(
                f"{root_path}.BaselineUsedForDriftCheckConstraints")
        self._properties = root_prop
Example #18
0
    def __init__(
        self,
        name: str,
        quality_check_config: QualityCheckConfig,
        check_job_config: CheckJobConfig,
        skip_check: Union[bool, PipelineNonPrimitiveInputTypes] = False,
        register_new_baseline: Union[bool, PipelineNonPrimitiveInputTypes] = False,
        model_package_group_name: Union[str, PipelineNonPrimitiveInputTypes] = None,
        supplied_baseline_statistics: Union[str, PipelineNonPrimitiveInputTypes] = None,
        supplied_baseline_constraints: Union[str, PipelineNonPrimitiveInputTypes] = None,
        display_name: str = None,
        description: str = None,
        cache_config: CacheConfig = None,
        depends_on: Union[List[str], List[Step]] = None,
    ):
        """Constructs a QualityCheckStep.

        Args:
            name (str): The name of the QualityCheckStep step.
            quality_check_config (QualityCheckConfig): A QualityCheckConfig instance.
            check_job_config (CheckJobConfig): A CheckJobConfig instance.
            skip_check (bool or PipelineNonPrimitiveInputTypes): Whether the check
                should be skipped (default: False).
            register_new_baseline (bool or PipelineNonPrimitiveInputTypes): Whether
                the new baseline should be registered (default: False).
            model_package_group_name (str or PipelineNonPrimitiveInputTypes): The name of a
                registered model package group, among which the baseline will be fetched
                from the latest approved model (default: None).
            supplied_baseline_statistics (str or PipelineNonPrimitiveInputTypes): The S3 path
                to the supplied statistics object representing the statistics JSON file
                which will be used for drift to check (default: None).
            supplied_baseline_constraints (str or PipelineNonPrimitiveInputTypes): The S3 path
                to the supplied constraints object representing the constraints JSON file
                which will be used for drift to check (default: None).
            display_name (str): The display name of the QualityCheckStep step (default: None).
            description (str): The description of the QualityCheckStep step (default: None).
            cache_config (CacheConfig):  A `sagemaker.workflow.steps.CacheConfig` instance
                (default: None).
            depends_on (List[str] or List[Step]): A list of step names or step instances
                this `sagemaker.workflow.steps.QualityCheckStep` depends on (default: None).
        """
        if not isinstance(quality_check_config, DataQualityCheckConfig) and not isinstance(
            quality_check_config, ModelQualityCheckConfig
        ):
            raise RuntimeError(
                "The quality_check_config can only be object of "
                + "DataQualityCheckConfig or ModelQualityCheckConfig"
            )

        super(QualityCheckStep, self).__init__(
            name, display_name, description, StepTypeEnum.QUALITY_CHECK, depends_on
        )
        self.skip_check = skip_check
        self.register_new_baseline = register_new_baseline
        self.check_job_config = check_job_config
        self.quality_check_config = quality_check_config
        self.model_package_group_name = model_package_group_name
        self.supplied_baseline_statistics = supplied_baseline_statistics
        self.supplied_baseline_constraints = supplied_baseline_constraints
        self.cache_config = cache_config

        if isinstance(self.quality_check_config, DataQualityCheckConfig):
            self._model_monitor = self.check_job_config._generate_model_monitor(
                "DefaultModelMonitor"
            )
        else:
            self._model_monitor = self.check_job_config._generate_model_monitor(
                "ModelQualityMonitor"
            )
        self._model_monitor.latest_baselining_job_name = (
            self._model_monitor._generate_baselining_job_name()
        )

        baseline_job_inputs_with_nones = self._generate_baseline_job_inputs()
        self._baseline_job_inputs = [
            baseline_job_input
            for baseline_job_input in baseline_job_inputs_with_nones.values()
            if baseline_job_input is not None
        ]
        self._baseline_output = self._generate_baseline_output()
        self._baselining_processor = self._generate_baseline_processor(
            baseline_dataset_input=baseline_job_inputs_with_nones["baseline_dataset_input"],
            baseline_output=self._baseline_output,
            post_processor_script_input=baseline_job_inputs_with_nones[
                "post_processor_script_input"
            ],
            record_preprocessor_script_input=baseline_job_inputs_with_nones[
                "record_preprocessor_script_input"
            ],
        )

        root_path = f"Steps.{name}"
        root_prop = Properties(path=root_path)
        root_prop.__dict__["CalculatedBaselineConstraints"] = Properties(
            f"{root_path}.CalculatedBaselineConstraints"
        )
        root_prop.__dict__["CalculatedBaselineStatistics"] = Properties(
            f"{root_path}.CalculatedBaselineStatistics"
        )
        root_prop.__dict__["BaselineUsedForDriftCheckStatistics"] = Properties(
            f"{root_path}.BaselineUsedForDriftCheckStatistics"
        )
        root_prop.__dict__["BaselineUsedForDriftCheckConstraints"] = Properties(
            f"{root_path}.BaselineUsedForDriftCheckConstraints"
        )
        self._properties = root_prop
Example #19
0
    def __init__(
        self,
        name: str,
        step_args: Dict = None,
        estimator: EstimatorBase = None,
        display_name: str = None,
        description: str = None,
        inputs: Union[TrainingInput, dict, str, FileSystemInput] = None,
        cache_config: CacheConfig = None,
        depends_on: Optional[List[Union[str, Step, "StepCollection"]]] = None,
        retry_policies: List[RetryPolicy] = None,
    ):
        """Construct a `TrainingStep`, given an `EstimatorBase` instance.

        In addition to the `EstimatorBase` instance, the other arguments are those
        that are supplied to the `fit` method of the `sagemaker.estimator.Estimator`.

        Args:
            name (str): The name of the `TrainingStep`.
            step_args: The arguments for the `TrainingStep` definition.
            estimator (EstimatorBase): A `sagemaker.estimator.EstimatorBase` instance.
            display_name (str): The display name of the `TrainingStep`.
            description (str): The description of the `TrainingStep`.
            inputs (Union[str, dict, TrainingInput, FileSystemInput]): Information
                about the training data. This can be one of three types:

                * (str) the S3 location where training data is saved, or a file:// path in
                  local mode.
                * (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) If using multiple
                  channels for training data, you can specify a dictionary mapping channel names to
                  strings or :func:`~sagemaker.inputs.TrainingInput` objects.
                * (sagemaker.inputs.TrainingInput) - channel configuration for S3 data sources
                  that can provide additional information as well as the path to the training
                  dataset.
                  See :func:`sagemaker.inputs.TrainingInput` for full details.
                * (sagemaker.inputs.FileSystemInput) - channel configuration for
                  a file system data source that can provide additional information as well as
                  the path to the training dataset.

            cache_config (CacheConfig):  A `sagemaker.workflow.steps.CacheConfig` instance.
            depends_on (List[Union[str, Step, StepCollection]]): A list of `Step`/`StepCollection`
                names or `Step` instances or `StepCollection` instances that this `TrainingStep`
                depends on.
            retry_policies (List[RetryPolicy]):  A list of retry policies.
        """
        super(TrainingStep,
              self).__init__(name, StepTypeEnum.TRAINING, display_name,
                             description, depends_on, retry_policies)

        if not (step_args is not None) ^ (estimator is not None):
            raise ValueError("either step_args or estimator need to be given.")

        self.step_args = step_args
        self.estimator = estimator
        self.inputs = inputs

        self._properties = Properties(path=f"Steps.{name}",
                                      shape_name="DescribeTrainingJobResponse")
        self.cache_config = cache_config

        if self.cache_config:
            if (self.step_args and "ProfilerConfig" in self.step_args) or (
                    self.estimator is not None
                    and not self.estimator.disable_profiler):
                msg = (
                    "Profiling is enabled on the provided estimator. "
                    "The default profiler rule includes a timestamp "
                    "which will change each time the pipeline is "
                    "upserted, causing cache misses. If profiling "
                    "is not needed, set disable_profiler to True on the estimator."
                )
                warnings.warn(msg)

        if not self.step_args:
            warnings.warn(
                ('We are deprecating the instantiation of TrainingStep using "estimator".'
                 'Instead, simply using "step_args".'),
                DeprecationWarning,
            )

        self.job_name = None
        if estimator and (estimator.source_dir or estimator.entry_point):
            # By default, `Estimator` will upload the local code to an S3 path
            # containing a timestamp. This causes cache misses whenever a
            # pipeline is updated, even if the underlying script hasn't changed.
            # To avoid this, hash the contents of the training script and include it
            # in the `job_name` passed to the `Estimator`, which will be used
            # instead of the timestamped path.
            self.job_name = self._generate_code_upload_path()
 def __init__(self, name):
     super(CustomStep, self).__init__(name, StepTypeEnum.TRAINING)
     self._properties = Properties(path=f"Steps.{name}")
Example #21
0
    def __init__(
        self,
        name: str,
        processor: Processor,
        display_name: str = None,
        description: str = None,
        inputs: List[ProcessingInput] = None,
        outputs: List[ProcessingOutput] = None,
        job_arguments: List[str] = None,
        code: str = None,
        property_files: List[PropertyFile] = None,
        cache_config: CacheConfig = None,
        depends_on: Union[List[str], List[Step]] = None,
        retry_policies: List[RetryPolicy] = None,
    ):
        """Construct a ProcessingStep, given a `Processor` instance.

        In addition to the processor instance, the other arguments are those that are supplied to
        the `process` method of the `sagemaker.processing.Processor`.

        Args:
            name (str): The name of the processing step.
            processor (Processor): A `sagemaker.processing.Processor` instance.
            display_name (str): The display name of the processing step.
            description (str): The description of the processing step.
            inputs (List[ProcessingInput]): A list of `sagemaker.processing.ProcessorInput`
                instances. Defaults to `None`.
            outputs (List[ProcessingOutput]): A list of `sagemaker.processing.ProcessorOutput`
                instances. Defaults to `None`.
            job_arguments (List[str]): A list of strings to be passed into the processing job.
                Defaults to `None`.
            code (str): This can be an S3 URI or a local path to a file with the framework
                script to run. Defaults to `None`.
            property_files (List[PropertyFile]): A list of property files that workflow looks
                for and resolves from the configured processing output list.
            cache_config (CacheConfig):  A `sagemaker.workflow.steps.CacheConfig` instance.
            depends_on (List[str] or List[Step]): A list of step names or step instance
                this `sagemaker.workflow.steps.ProcessingStep` depends on
            retry_policies (List[RetryPolicy]):  A list of retry policy
        """
        super(ProcessingStep,
              self).__init__(name, StepTypeEnum.PROCESSING, display_name,
                             description, depends_on, retry_policies)
        self.processor = processor
        self.inputs = inputs
        self.outputs = outputs
        self.job_arguments = job_arguments
        self.code = code
        self.property_files = property_files
        self.job_name = None

        # Examine why run method in sagemaker.processing.Processor mutates the processor instance
        # by setting the instance's arguments attribute. Refactor Processor.run, if possible.
        self.processor.arguments = job_arguments

        self._properties = Properties(
            path=f"Steps.{name}", shape_name="DescribeProcessingJobResponse")
        self.cache_config = cache_config

        if code:
            code_url = urlparse(code)
            if code_url.scheme == "" or code_url.scheme == "file":
                # By default, Processor will upload the local code to an S3 path
                # containing a timestamp. This causes cache misses whenever a
                # pipeline is updated, even if the underlying script hasn't changed.
                # To avoid this, hash the contents of the script and include it
                # in the job_name passed to the Processor, which will be used
                # instead of the timestamped path.
                self.job_name = self._generate_code_upload_path()
    def __init__(
        self,
        name: str,
        estimator: EstimatorBase,
        model_data,
        content_types,
        response_types,
        inference_instances,
        transform_instances,
        model_package_group_name=None,
        model_metrics=None,
        metadata_properties=None,
        approval_status="PendingManualApproval",
        image_uri=None,
        compile_model_family=None,
        description=None,
        **kwargs,
    ):
        """Constructor of a register model step.

        Args:
            name (str): The name of the training step.
            step_type (StepTypeEnum): The type of the step with value `StepTypeEnum.Training`.
            estimator (EstimatorBase): A `sagemaker.estimator.EstimatorBase` instance.
            model_data: the S3 URI to the model data from training.
            content_types (list): The supported MIME types for the input data (default: None).
            response_types (list): The supported MIME types for the output data (default: None).
            inference_instances (list): A list of the instance types that are used to
                generate inferences in real-time (default: None).
            transform_instances (list): A list of the instance types on which a transformation
                job can be run or on which an endpoint can be deployed (default: None).
            model_package_group_name (str): Model Package Group name, exclusive to
                `model_package_name`, using `model_package_group_name` makes the Model Package
                versioned (default: None).
            model_metrics (ModelMetrics): ModelMetrics object (default: None).
            metadata_properties (MetadataProperties): MetadataProperties object (default: None).
            approval_status (str): Model Approval Status, values can be "Approved", "Rejected",
                or "PendingManualApproval" (default: "PendingManualApproval").
            image_uri (str): The container image uri for Model Package, if not specified,
                Estimator's training container image will be used (default: None).
            compile_model_family (str): Instance family for compiled model, if specified, a compiled
                model will be used (default: None).
            description (str): Model Package description (default: None).
            **kwargs: additional arguments to `create_model`.
        """
        super(_RegisterModelStep, self).__init__(name, StepTypeEnum.REGISTER_MODEL)
        self.estimator = estimator
        self.model_data = model_data
        self.content_types = content_types
        self.response_types = response_types
        self.inference_instances = inference_instances
        self.transform_instances = transform_instances
        self.model_package_group_name = model_package_group_name
        self.model_metrics = model_metrics
        self.metadata_properties = metadata_properties
        self.approval_status = approval_status
        self.image_uri = image_uri
        self.compile_model_family = compile_model_family
        self.description = description
        self.kwargs = kwargs

        self._properties = Properties(
            path=f"Steps.{name}", shape_name="DescribeModelPackageResponse"
        )
def test_pipeline_variable_in_pipeline_definition(sagemaker_session):
    param_str = ParameterString(name="MyString", default_value="1")
    param_int = ParameterInteger(name="MyInteger", default_value=3)

    property_file = PropertyFile(
        name="name",
        output_name="result",
        path="output",
    )
    json_get_func2 = JsonGet(
        step_name="my-step",
        property_file=property_file,
        json_path="my-json-path",
    )
    prop = Properties("Steps.MyStep", "DescribeProcessingJobResponse")

    cond = ConditionGreaterThan(left=param_str, right=param_int.to_string())
    step_fail = FailStep(
        name="MyFailStep",
        error_message=Join(
            on=" ",
            values=[
                "Execution failed due to condition check fails, see:",
                json_get_func2.to_string(),
                prop.ProcessingOutputConfig.Outputs["MyOutputName"].S3Output.
                S3Uri.to_string(),
                param_int,
            ],
        ),
    )
    step_cond = ConditionStep(
        name="MyCondStep",
        conditions=[cond],
        if_steps=[],
        else_steps=[step_fail],
    )
    pipeline = Pipeline(
        name="MyPipeline",
        parameters=[param_str, param_int],
        steps=[step_cond],
        sagemaker_session=sagemaker_session,
    )

    dsl = json.loads(pipeline.definition())
    assert dsl["Parameters"] == [
        {
            "Name": "MyString",
            "Type": "String",
            "DefaultValue": "1"
        },
        {
            "Name": "MyInteger",
            "Type": "Integer",
            "DefaultValue": 3
        },
    ]
    assert len(dsl["Steps"]) == 1
    assert dsl["Steps"][0] == {
        "Name": "MyCondStep",
        "Type": "Condition",
        "Arguments": {
            "Conditions": [
                {
                    "Type": "GreaterThan",
                    "LeftValue": {
                        "Get": "Parameters.MyString"
                    },
                    "RightValue": {
                        "Std:Join": {
                            "On": "",
                            "Values": [{
                                "Get": "Parameters.MyInteger"
                            }],
                        },
                    },
                },
            ],
            "IfSteps": [],
            "ElseSteps": [{
                "Name": "MyFailStep",
                "Type": "Fail",
                "Arguments": {
                    "ErrorMessage": {
                        "Std:Join": {
                            "On":
                            " ",
                            "Values": [
                                "Execution failed due to condition check fails, see:",
                                {
                                    "Std:Join": {
                                        "On":
                                        "",
                                        "Values": [
                                            {
                                                "Std:JsonGet": {
                                                    "PropertyFile": {
                                                        "Get":
                                                        "Steps.my-step.PropertyFiles.name"
                                                    },
                                                    "Path": "my-json-path",
                                                }
                                            },
                                        ],
                                    },
                                },
                                {
                                    "Std:Join": {
                                        "On":
                                        "",
                                        "Values": [
                                            {
                                                "Get":
                                                "Steps.MyStep.ProcessingOutputConfig."
                                                +
                                                "Outputs['MyOutputName'].S3Output.S3Uri"
                                            },
                                        ],
                                    },
                                },
                                {
                                    "Get": "Parameters.MyInteger"
                                },
                            ],
                        }
                    }
                },
            }],
        },
    }