def __init__(self,
                 state_id,
                 endpoint_name,
                 endpoint_config_name,
                 tags=None,
                 update=False,
                 **kwargs):
        """
        Args:
            state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine.
            endpoint_name (str or Placeholder): The name of the endpoint to create. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution.
            endpoint_config_name (str or Placeholder): The name of the endpoint configuration to use for the endpoint. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution.
            tags (list[dict], optional): `List to tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource.
            update (bool, optional): Boolean flag set to `True` if endpoint must to be updated. Set to `False` if new endpoint must be created. (default: False)
            tags (list[dict], optional): `List to tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource.
        """

        parameters = {
            "EndpointConfigName": endpoint_config_name,
            "EndpointName": endpoint_name,
        }

        if tags:
            parameters['Tags'] = tags_dict_to_kv_list(tags)

        if update:
            kwargs[Field.Resource.
                   value] = 'arn:aws:states:::sagemaker:updateEndpoint'
        else:
            kwargs[Field.Resource.
                   value] = 'arn:aws:states:::sagemaker:createEndpoint'

        kwargs[Field.Parameters.value] = parameters

        super(EndpointStep, self).__init__(state_id, **kwargs)
    def __init__(self,
                 state_id,
                 model,
                 model_name=None,
                 instance_type=None,
                 tags=None,
                 **kwargs):
        """
        Args:
            state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine.
            model (sagemaker.model.Model): The SageMaker model to use in the ModelStep. If :py:class:`TrainingStep` was used to train the model and saving the model is the next step in the workflow, the output of :py:func:`TrainingStep.get_expected_model()` can be passed here.
            model_name (str or Placeholder, optional): Specify a model name, this is required for creating the model. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution.
            instance_type (str, optional): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
            tags (list[dict] or Placeholders, optional): `List of tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource.
            parameters(dict, optional): The value of this field is merged with other arguments to become the request payload for SageMaker `CreateModel <https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_CreateModel.html>`_. (Default: None)
                You can use `parameters` to override the value provided by other arguments and specify any field's value dynamically using `Placeholders <https://aws-step-functions-data-science-sdk.readthedocs.io/en/stable/placeholders.html?highlight=placeholder#stepfunctions.inputs.Placeholder>`_.
        """
        if isinstance(model, FrameworkModel):
            model_parameters = model_config(model=model,
                                            instance_type=instance_type,
                                            role=model.role,
                                            image_uri=model.image_uri)
            if model_name:
                model_parameters['ModelName'] = model_name
        elif isinstance(model, Model):
            model_parameters = {
                'ExecutionRoleArn': model.role,
                'ModelName': model_name or model.name,
                'PrimaryContainer': {
                    'Environment': model.env,
                    'Image': model.image_uri,
                    'ModelDataUrl': model.model_data
                }
            }
        else:
            raise ValueError(
                "Expected 'model' parameter to be of type 'sagemaker.model.Model', but received type '{}'"
                .format(type(model).__name__))

        if 'S3Operations' in model_parameters:
            del model_parameters['S3Operations']

        if tags:
            model_parameters['Tags'] = tags if isinstance(
                tags, Placeholder) else tags_dict_to_kv_list(tags)

        if Field.Parameters.value in kwargs and isinstance(
                kwargs[Field.Parameters.value], dict):
            # Update model parameters with input parameters
            merge_dicts(model_parameters, kwargs[Field.Parameters.value])

        kwargs[Field.Parameters.value] = model_parameters
        """
        Example resource arn: arn:aws:states:::sagemaker:createModel
        """

        kwargs[Field.Resource.value] = get_service_integration_arn(
            SAGEMAKER_SERVICE_NAME, SageMakerApi.CreateModel)

        super(ModelStep, self).__init__(state_id, **kwargs)
コード例 #3
0
    def __init__(self, state_id, endpoint_config_name, model_name, initial_instance_count, instance_type, variant_name='AllTraffic', data_capture_config=None, tags=None, **kwargs):
        """
        Args:
            state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine.
            endpoint_config_name (str or Placeholder): The name of the endpoint configuration to create. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution.
            model_name (str or Placeholder): The name of the SageMaker model to attach to the endpoint configuration. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution.
            initial_instance_count (int or Placeholder): The initial number of instances to run in the ``Endpoint`` created from this ``Model``.
            instance_type (str or Placeholder): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
            variant_name (str, optional): The name of the production variant.
            data_capture_config (sagemaker.model_monitor.DataCaptureConfig, optional): Specifies
                configuration related to Endpoint data capture for use with
                Amazon SageMaker Model Monitoring. Default: None.
            tags (list[dict], optional): `List to tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource.
        """
        parameters = {
            'EndpointConfigName': endpoint_config_name,
            'ProductionVariants': [{
                'InitialInstanceCount': initial_instance_count,
                'InstanceType': instance_type,
                'ModelName': model_name,
                'VariantName': variant_name
            }]
        }

        if isinstance(data_capture_config, DataCaptureConfig):
            parameters['DataCaptureConfig'] = data_capture_config._to_request_dict()
            
        if tags:
            parameters['Tags'] = tags_dict_to_kv_list(tags)

        kwargs[Field.Resource.value] = 'arn:aws:states:::sagemaker:createEndpointConfig'
        kwargs[Field.Parameters.value] = parameters

        super(EndpointConfigStep, self).__init__(state_id, **kwargs)
コード例 #4
0
ファイル: custom_steps.py プロジェクト: yapweiyih/mlmax
    def __init__(self,
                 state_id,
                 model,
                 model_data_url=None,
                 sagemaker_submit_directory=None,
                 model_name=None,
                 instance_type=None,
                 tags=None,
                 **kwargs):
        """
        Args:
            state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine.
            model (sagemaker.model.Model): The SageMaker model to use in the ModelStep. If :py:class:`TrainingStep` was used to train the model and saving the model is the next step in the workflow, the output of :py:func:`TrainingStep.get_expected_model()` can be passed here.
            model_name (str or Placeholder, optional): Specify a model name, this is required for creating the model. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution.
            instance_type (str, optional): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. This parameter is typically required when the estimator used is not an `Amazon built-in algorithm <https://docs.aws.amazon.com/sagemaker/latest/dg/algos.html>`_.
            tags (list[dict], optional): `List to tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource.
        """
        if isinstance(model, FrameworkModel):
            parameters = model_config(model=model,
                                      instance_type=instance_type,
                                      role=model.role,
                                      image=model.image)
            if model_name:
                parameters['ModelName'] = model_name
            # placeholder for model data url
            if model_data_url:
                parameters['PrimaryContainer']['ModelDataUrl'] = model_data_url
            # placeholder for sagemaker script
            if sagemaker_submit_directory:
                parameters['PrimaryContainer']['Environment'][
                    'SAGEMAKER_SUBMIT_DIRECTORY'] = sagemaker_submit_directory
            print(parameters)
        elif isinstance(model, Model):
            parameters = {
                'ExecutionRoleArn': model.role,
                'ModelName': model_name or model.name,
                'PrimaryContainer': {
                    'Environment': {},
                    'Image': model.image,
                    'ModelDataUrl': model.model_data
                }
            }
        else:
            raise ValueError(
                "Expected 'model' parameter to be of type 'sagemaker.model.Model', but received type '{}'"
                .format(type(model).__name__))

        if 'S3Operations' in parameters:
            del parameters['S3Operations']

        if tags:
            parameters['Tags'] = tags_dict_to_kv_list(tags)

        kwargs[Field.Parameters.value] = parameters
        kwargs[Field.Resource.value] = 'arn:aws:states:::sagemaker:createModel'

        super(MLMaxModelStep, self).__init__(state_id, **kwargs)
    def __init__(self,
                 state_id,
                 tuner,
                 job_name,
                 data,
                 wait_for_completion=True,
                 tags=None,
                 **kwargs):
        """
        Args:
            state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine.
            tuner (sagemaker.tuner.HyperparameterTuner): The tuner to use in the TuningStep.
            job_name (str or Placeholder): Specify a tuning job name.  We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution.
            data: Information about the training data. Please refer to the ``fit()`` method of the associated estimator in the tuner, as this can take any of the following forms:

                * (str) - The S3 location where training data is saved.
                * (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) - If using multiple
                    channels for training data, you can specify a dict mapping channel names to
                    strings or :func:`~sagemaker.inputs.TrainingInput` objects.
                * (sagemaker.inputs.TrainingInput) - Channel configuration for S3 data sources that can
                    provide additional information about the training dataset. See
                    :func:`sagemaker.inputs.TrainingInput` for full details.
                * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of
                    Amazon :class:`Record` objects serialized and stored in S3.
                    For use with an estimator for an Amazon algorithm.
                * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of
                    :class:`sagemaker.amazon.amazon_estimator.RecordSet` objects,
                    where each instance is a different channel of training data.
            wait_for_completion(bool, optional): Boolean value set to `True` if the Task state should wait for the tuning job to complete before proceeding to the next step in the workflow. Set to `False` if the Task state should submit the tuning job and proceed to the next step. (default: True)
            tags (list[dict], optional): `List to tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource.
        """
        if wait_for_completion:
            kwargs[
                Field.Resource.
                value] = 'arn:aws:states:::sagemaker:createHyperParameterTuningJob.sync'
        else:
            kwargs[
                Field.Resource.
                value] = 'arn:aws:states:::sagemaker:createHyperParameterTuningJob'

        parameters = tuning_config(tuner=tuner, inputs=data,
                                   job_name=job_name).copy()

        if job_name is not None:
            parameters['HyperParameterTuningJobName'] = job_name

        if 'S3Operations' in parameters:
            del parameters['S3Operations']

        if tags:
            parameters['Tags'] = tags_dict_to_kv_list(tags)

        kwargs[Field.Parameters.value] = parameters

        super(TuningStep, self).__init__(state_id, **kwargs)
コード例 #6
0
    def __init__(self,
                 state_id,
                 model,
                 model_name=None,
                 instance_type=None,
                 tags=None,
                 **kwargs):
        """
        Args:
            state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine.
            model (sagemaker.model.Model): The SageMaker model to use in the ModelStep. If :py:class:`TrainingStep` was used to train the model and saving the model is the next step in the workflow, the output of :py:func:`TrainingStep.get_expected_model()` can be passed here.
            model_name (str or Placeholder, optional): Specify a model name, this is required for creating the model. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution.
            instance_type (str, optional): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
            tags (list[dict], optional): `List to tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource.
        """
        if isinstance(model, FrameworkModel):
            parameters = model_config(model=model,
                                      instance_type=instance_type,
                                      role=model.role,
                                      image_uri=model.image_uri)
            if model_name:
                parameters['ModelName'] = model_name
        elif isinstance(model, Model):
            parameters = {
                'ExecutionRoleArn': model.role,
                'ModelName': model_name or model.name,
                'PrimaryContainer': {
                    'Environment': model.env,
                    'Image': model.image_uri,
                    'ModelDataUrl': model.model_data
                }
            }
        else:
            raise ValueError(
                "Expected 'model' parameter to be of type 'sagemaker.model.Model', but received type '{}'"
                .format(type(model).__name__))

        if 'S3Operations' in parameters:
            del parameters['S3Operations']

        if tags:
            parameters['Tags'] = tags_dict_to_kv_list(tags)

        kwargs[Field.Parameters.value] = parameters
        """
        Example resource arn: arn:aws:states:::sagemaker:createModel
        """

        kwargs[Field.Resource.value] = get_service_integration_arn(
            SAGEMAKER_SERVICE_NAME, SageMakerApi.CreateModel)

        super(ModelStep, self).__init__(state_id, **kwargs)
コード例 #7
0
    def __init__(self, state_id, processor, job_name, inputs=None, outputs=None, experiment_config=None, container_arguments=None, container_entrypoint=None, kms_key_id=None, wait_for_completion=True, tags=None, **kwargs):
        """
        Args:
            state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine.
            processor (sagemaker.processing.Processor): The processor for the processing step.
            job_name (str or Placeholder): Specify a processing job name, this is required for the processing job to run. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution.
            inputs (list[:class:`~sagemaker.processing.ProcessingInput`]): Input files for
                the processing job. These must be provided as
                :class:`~sagemaker.processing.ProcessingInput` objects (default: None).
            outputs (list[:class:`~sagemaker.processing.ProcessingOutput`]): Outputs for
                the processing job. These can be specified as either path strings or
                :class:`~sagemaker.processing.ProcessingOutput` objects (default: None).
            experiment_config (dict, optional): Specify the experiment config for the processing. (Default: None)
            container_arguments ([str]): The arguments for a container used to run a processing job.
            container_entrypoint ([str]): The entrypoint for a container used to run a processing job.
            kms_key_id (str): The AWS Key Management Service (AWS KMS) key that Amazon SageMaker
                uses to encrypt the processing job output. KmsKeyId can be an ID of a KMS key,
                ARN of a KMS key, alias of a KMS key, or alias of a KMS key.
                The KmsKeyId is applied to all outputs.
            wait_for_completion (bool, optional): Boolean value set to `True` if the Task state should wait for the processing job to complete before proceeding to the next step in the workflow. Set to `False` if the Task state should submit the processing job and proceed to the next step. (default: True)
            tags (list[dict], optional): `List to tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource.
        """
        if wait_for_completion:
            kwargs[Field.Resource.value] = 'arn:aws:states:::sagemaker:createProcessingJob.sync'
        else:
            kwargs[Field.Resource.value] = 'arn:aws:states:::sagemaker:createProcessingJob'
        
        if isinstance(job_name, str):
            parameters = processing_config(processor=processor, inputs=inputs, outputs=outputs, container_arguments=container_arguments, container_entrypoint=container_entrypoint, kms_key_id=kms_key_id, job_name=job_name)
        else:
            parameters = processing_config(processor=processor, inputs=inputs, outputs=outputs, container_arguments=container_arguments, container_entrypoint=container_entrypoint, kms_key_id=kms_key_id)

        if isinstance(job_name, (ExecutionInput, StepInput)):
            parameters['ProcessingJobName'] = job_name
        
        if experiment_config is not None:
            parameters['ExperimentConfig'] = experiment_config
        
        if tags:
            parameters['Tags'] = tags_dict_to_kv_list(tags)
        
        if 'S3Operations' in parameters:
            del parameters['S3Operations']
        
        kwargs[Field.Parameters.value] = parameters

        super(ProcessingStep, self).__init__(state_id, **kwargs)
コード例 #8
0
ファイル: custom_steps.py プロジェクト: kranthigy/mlmax
    def __init__(
        self,
        state_id,
        estimator,
        job_name,
        data=None,
        hyperparameters=None,
        mini_batch_size=None,
        experiment_config=None,
        wait_for_completion=True,
        tags=None,
        train_data=None,
        test_data=None,
        sm_submit_url=None,
        sm_region=None,
        sm_output_data=None,
        sm_debug_output_data=None,
        **kwargs,
    ):
        """
        Args:
            state_id (str): State name whose length **must be** less than or
            equal to 128 unicode characters. State names **must be** unique
            within the scope of the whole state machine.  estimator
            (sagemaker.estimator.EstimatorBase): The estimator for the training
            step. Can be a `BYO estimator, Framework estimator
            <https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms.html>`_
            or `Amazon built-in algorithm estimator
            <https://docs.aws.amazon.com/sagemaker/latest/dg/algos.html>`_.
            job_name (str or Placeholder): Specify a training job name, this is
            required for the training job to run. We recommend to use
            :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder
            collection to pass the value dynamically in each execution.  data:
            Information about the training data. Please refer to the ``fit()``
            method of the associated estimator, as this can take any of the
            following forms:

                * (str) - The S3 location where training data is saved.
                * (dict[str, str] or dict[str, sagemaker.session.s3_input]) -
                    If using multiple channels for training data, you can specify a
                    dict mapping channel names to strings or
                    :func:`~sagemaker.session.s3_input` objects.
                * (sagemaker.session.s3_input) - Channel configuration for S3
                    data sources that can provide additional information about the
                    training dataset. See :func:`sagemaker.session.s3_input` for
                    full details.
                * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of
                    Amazon :class:`Record` objects serialized and stored in S3.
                    For use with an estimator for an Amazon algorithm.
                * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of
                    :class:`sagemaker.amazon.amazon_estimator.RecordSet` objects,
                    where each instance is a different channel of training data.
            hyperparameters (dict, optional): Specify the hyper parameters for
            the training. (Default: None)
            mini_batch_size (int): Specify this argument only when estimator is
            a built-in estimator of an Amazon algorithm. For other estimators,
            batch size should be specified in the estimator.
            experiment_config (dict, optional): Specify the experiment config
            for the training. (Default: None)
            wait_for_completion (bool, optional): Boolean value set to `True`
            if the Task state should wait for the training job to complete
            before proceeding to the next step in the workflow. Set to `False`
            if the Task state should submit the training job and proceed to the
            next step. (default: True)
            tags (list[dict], optional): `List to tags
            <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to
            associate with the resource.
        """
        self.estimator = estimator
        self.job_name = job_name

        if wait_for_completion:
            kwargs[
                Field.Resource.value
            ] = "arn:aws:states:::sagemaker:createTrainingJob.sync"
        else:
            kwargs[
                Field.Resource.value
            ] = "arn:aws:states:::sagemaker:createTrainingJob"

        if isinstance(job_name, str):
            parameters = training_config(
                estimator=estimator,
                inputs=data,
                job_name=job_name,
                mini_batch_size=mini_batch_size,
            )
        else:
            parameters = training_config(
                estimator=estimator, inputs=data, mini_batch_size=mini_batch_size
            )

        if data is None and train_data is not None and test_data is not None:
            if isinstance(train_data, (ExecutionInput, StepInput)) and isinstance(
                test_data, (ExecutionInput, StepInput)
            ):
                parameters["InputDataConfig"] = [
                    {
                        "DataSource": {
                            "S3DataSource": {
                                "S3DataType": "S3Prefix",
                                "S3Uri": train_data,
                                "S3DataDistributionType": "FullyReplicated",
                            }
                        },
                        "ChannelName": "train",
                    },
                    {
                        "DataSource": {
                            "S3DataSource": {
                                "S3DataType": "S3Prefix",
                                "S3Uri": test_data,
                                "S3DataDistributionType": "FullyReplicated",
                            }
                        },
                        "ChannelName": "test",
                    },
                ]

        if sm_output_data is not None:
            parameters["OutputDataConfig"]["S3OutputPath"] = sm_output_data

        if estimator.debugger_hook_config is not None:
            parameters[
                "DebugHookConfig"
            ] = estimator.debugger_hook_config._to_request_dict()

        if estimator.rules is not None:
            parameters["DebugRuleConfigurations"] = [
                rule.to_debugger_rule_config_dict() for rule in estimator.rules
            ]

        if sm_debug_output_data is not None:
            parameters["DebugHookConfig"]["S3OutputPath"] = sm_debug_output_data

        if isinstance(job_name, (ExecutionInput, StepInput)):
            parameters["TrainingJobName"] = job_name

        if hyperparameters is not None:
            if "HyperParameters" in parameters:
                # try to void overwriting reserved hyperparameters:
                # github.com/aws/sagemaker-training-toolkit/blob/
                # master/src/sagemaker_training/params.py
                parameters["HyperParameters"].update(hyperparameters)
            else:
                parameters["HyperParameters"] = hyperparameters

        if isinstance(job_name, (ExecutionInput, StepInput)):
            parameters["HyperParameters"]["sagemaker_job_name"] = job_name

        if sm_submit_url is not None and isinstance(
            sm_submit_url, (ExecutionInput, StepInput)
        ):
            parameters["HyperParameters"]["sagemaker_submit_directory"] = sm_submit_url

        if sm_region is not None and isinstance(sm_region, (ExecutionInput, StepInput)):
            parameters["HyperParameters"]["sagemaker_region"] = sm_region

        if experiment_config is not None:
            parameters["ExperimentConfig"] = experiment_config

        if "S3Operations" in parameters:
            del parameters["S3Operations"]

        if tags:
            parameters["Tags"] = tags_dict_to_kv_list(tags)

        kwargs[Field.Parameters.value] = parameters
        # print(kwargs)
        super(MLMaxTrainingStep, self).__init__(state_id, **kwargs)
コード例 #9
0
ファイル: custom_steps.py プロジェクト: kranthigy/mlmax
    def __init__(
        self,
        state_id,
        transformer,
        job_name,
        model_name,
        data,
        outputpath,
        data_type="S3Prefix",
        content_type=None,
        compression_type=None,
        split_type=None,
        experiment_config=None,
        wait_for_completion=True,
        tags=None,
        input_filter=None,
        output_filter=None,
        join_source=None,
        **kwargs,
    ):
        """
        Args:
            state_id (str): State name whose length **must be** less than or
            equal to 128 unicode characters. State names **must be** unique
            within the scope of the whole state machine.

            transformer (sagemaker.transformer.Transformer): The SageMaker
            transformer to use in the TransformStep.

            job_name (str or Placeholder): Specify a transform job name. We
            recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput`
            placeholder collection to pass the value dynamically in each
            execution.

            model_name (str or Placeholder): Specify a model name for the
            transform job to use. We recommend to use
            :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder
            collection to pass the value dynamically in each execution.

            data (str): Input data location in S3.

            data_type (str): What the S3 location defines (default: 'S3Prefix').
                Valid values:

                * 'S3Prefix' - the S3 URI defines a key name prefix. All
                objects with this prefix will be used as inputs for the
                transform job.
                * 'ManifestFile' - the S3 URI points to a single manifest file
                listing each S3 object
                    to use as an input for the transform job.

            content_type (str): MIME type of the input data (default: None).

            compression_type (str): Compression type of the input data, if
            compressed (default: None). Valid values: 'Gzip', None.

            split_type (str): The record delimiter for the input object
            (default: 'None'). Valid values: 'None', 'Line', 'RecordIO', and
            'TFRecord'.

            experiment_config (dict, optional): Specify the experiment config
            for the transform. (Default: None)

            wait_for_completion(bool, optional): Boolean value set to `True` if
            the Task state should wait for the transform job to complete before
            proceeding to the next step in the workflow. Set to `False` if the
            Task state should submit the transform job and proceed to the next
            step. (default: True)

            tags (list[dict], optional): `List to tags
            <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to
            associate with the resource.

            input_filter (str): A JSONPath to select a portion of the input to
            pass to the algorithm container for inference. If you omit the
            field, it gets the value ‘$’, representing the entire input. For
            CSV data, each row is taken as a JSON array, so only index-based
            JSONPaths can be applied, e.g. $[0], $[1:]. CSV data should follow
            the RFC format. See Supported JSONPath Operators for a table of
            supported JSONPath operators. For more information, see the
            SageMaker API documentation for CreateTransformJob. Some examples:
                “$[1:]”, “$.features” (default: None).

            output_filter (str): A JSONPath to select a portion of the
            joined/original output to return as the output. For more
            information, see the SageMaker API documentation for
            CreateTransformJob. Some examples: “$[1:]”, “$.prediction”
            (default: None).

            join_source (str): The source of data to be joined to the transform
            output. It can be set to ‘Input’ meaning the entire input record
            will be joined to the inference result. You can use OutputFilter to
            select the useful portion before uploading to S3. (default: None).
            Valid values: Input, None.
        """
        if wait_for_completion:
            kwargs[
                Field.Resource.value
            ] = "arn:aws:states:::sagemaker:createTransformJob.sync"
        else:
            kwargs[
                Field.Resource.value
            ] = "arn:aws:states:::sagemaker:createTransformJob"

        if isinstance(job_name, str):
            parameters = transform_config(
                transformer=transformer,
                data=data,
                data_type=data_type,
                content_type=content_type,
                compression_type=compression_type,
                split_type=split_type,
                job_name=job_name,
                input_filter=input_filter,
                output_filter=output_filter,
                join_source=join_source,
            )
        else:
            parameters = transform_config(
                transformer=transformer,
                data=data,
                data_type=data_type,
                content_type=content_type,
                compression_type=compression_type,
                split_type=split_type,
                input_filter=input_filter,
                output_filter=output_filter,
                join_source=join_source,
            )

        if isinstance(job_name, (ExecutionInput, StepInput)):
            parameters["TransformJobName"] = job_name

        parameters["ModelName"] = model_name
        parameters["TransformOutput"]["S3OutputPath"] = outputpath

        if experiment_config is not None:
            parameters["ExperimentConfig"] = experiment_config

        if tags:
            parameters["Tags"] = tags_dict_to_kv_list(tags)

        # print(parameters)

        kwargs[Field.Parameters.value] = parameters
        super(MLMaxBatchTransformStep, self).__init__(state_id, **kwargs)
    def __init__(self,
                 state_id,
                 estimator,
                 job_name,
                 data=None,
                 hyperparameters=None,
                 mini_batch_size=None,
                 experiment_config=None,
                 wait_for_completion=True,
                 tags=None,
                 **kwargs):
        """
        Args:
            state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine.
            estimator (sagemaker.estimator.EstimatorBase): The estimator for the training step. Can be a `BYO estimator, Framework estimator <https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms.html>`_ or `Amazon built-in algorithm estimator <https://docs.aws.amazon.com/sagemaker/latest/dg/algos.html>`_.
            job_name (str or Placeholder): Specify a training job name, this is required for the training job to run. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution.
            data: Information about the training data. Please refer to the ``fit()`` method of the associated estimator, as this can take any of the following forms:

                * (str) - The S3 location where training data is saved.
                * (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) - If using multiple
                    channels for training data, you can specify a dict mapping channel names to
                    strings or :func:`~sagemaker.inputs.TrainingInput` objects.
                * (sagemaker.inputs.TrainingInput) - Channel configuration for S3 data sources that can
                    provide additional information about the training dataset. See
                    :func:`sagemaker.inputs.TrainingInput` for full details.
                * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of
                    Amazon :class:`Record` objects serialized and stored in S3.
                    For use with an estimator for an Amazon algorithm.
                * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of
                    :class:`sagemaker.amazon.amazon_estimator.RecordSet` objects,
                    where each instance is a different channel of training data.
            hyperparameters (dict, optional): Specify the hyper parameters for the training. (Default: None)
            mini_batch_size (int): Specify this argument only when estimator is a built-in estimator of an Amazon algorithm. For other estimators, batch size should be specified in the estimator.
            experiment_config (dict, optional): Specify the experiment config for the training. (Default: None)
            wait_for_completion (bool, optional): Boolean value set to `True` if the Task state should wait for the training job to complete before proceeding to the next step in the workflow. Set to `False` if the Task state should submit the training job and proceed to the next step. (default: True)
            tags (list[dict], optional): `List to tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource.
        """
        self.estimator = estimator
        self.job_name = job_name

        if wait_for_completion:
            kwargs[Field.Resource.
                   value] = 'arn:aws:states:::sagemaker:createTrainingJob.sync'
        else:
            kwargs[Field.Resource.
                   value] = 'arn:aws:states:::sagemaker:createTrainingJob'

        if isinstance(job_name, str):
            parameters = training_config(estimator=estimator,
                                         inputs=data,
                                         job_name=job_name,
                                         mini_batch_size=mini_batch_size)
        else:
            parameters = training_config(estimator=estimator,
                                         inputs=data,
                                         mini_batch_size=mini_batch_size)

        if estimator.debugger_hook_config != None and estimator.debugger_hook_config is not False:
            parameters[
                'DebugHookConfig'] = estimator.debugger_hook_config._to_request_dict(
                )

        if estimator.rules != None:
            parameters['DebugRuleConfigurations'] = [
                rule.to_debugger_rule_config_dict() for rule in estimator.rules
            ]

        if isinstance(job_name, (ExecutionInput, StepInput)):
            parameters['TrainingJobName'] = job_name

        if hyperparameters is not None:
            parameters['HyperParameters'] = hyperparameters

        if experiment_config is not None:
            parameters['ExperimentConfig'] = experiment_config

        if 'S3Operations' in parameters:
            del parameters['S3Operations']

        if tags:
            parameters['Tags'] = tags_dict_to_kv_list(tags)

        kwargs[Field.Parameters.value] = parameters
        super(TrainingStep, self).__init__(state_id, **kwargs)
コード例 #11
0
    def __init__(self,
                 state_id,
                 processor,
                 job_name,
                 inputs=None,
                 outputs=None,
                 experiment_config=None,
                 container_arguments=None,
                 container_entrypoint=None,
                 kms_key_id=None,
                 wait_for_completion=True,
                 tags=None,
                 **kwargs):
        """
        Args:
            state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine.
            processor (sagemaker.processing.Processor): The processor for the processing step.
            job_name (str or Placeholder): Specify a processing job name, this is required for the processing job to run. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution.
            inputs (list[:class:`~sagemaker.processing.ProcessingInput`]): Input files for
                the processing job. These must be provided as
                :class:`~sagemaker.processing.ProcessingInput` objects (default: None).
            outputs (list[:class:`~sagemaker.processing.ProcessingOutput`]): Outputs for
                the processing job. These can be specified as either path strings or
                :class:`~sagemaker.processing.ProcessingOutput` objects (default: None).
            experiment_config (dict or Placeholder, optional): Specify the experiment config for the processing. (Default: None)
            container_arguments ([str] or Placeholder): The arguments for a container used to run a processing job.
            container_entrypoint ([str] or Placeholder): The entrypoint for a container used to run a processing job.
            kms_key_id (str or Placeholder): The AWS Key Management Service (AWS KMS) key that Amazon SageMaker
                uses to encrypt the processing job output. KmsKeyId can be an ID of a KMS key,
                ARN of a KMS key, alias of a KMS key, or alias of a KMS key.
                The KmsKeyId is applied to all outputs.
            wait_for_completion (bool, optional): Boolean value set to `True` if the Task state should wait for the processing job to complete before proceeding to the next step in the workflow. Set to `False` if the Task state should submit the processing job and proceed to the next step. (default: True)
            tags (list[dict] or Placeholder, optional): `List to tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource.
            parameters(dict, optional): The value of this field is merged with other arguments to become the request payload for SageMaker `CreateProcessingJob<https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_CreateProcessingJob.html>`_. 
                You can use `parameters` to override the value provided by other arguments and specify any field's value dynamically using `Placeholders<https://aws-step-functions-data-science-sdk.readthedocs.io/en/stable/placeholders.html?highlight=placeholder#stepfunctions.inputs.Placeholder>`_.

        """
        if wait_for_completion:
            """
            Example resource arn: arn:aws:states:::sagemaker:createProcessingJob.sync
            """

            kwargs[Field.Resource.value] = get_service_integration_arn(
                SAGEMAKER_SERVICE_NAME, SageMakerApi.CreateProcessingJob,
                IntegrationPattern.WaitForCompletion)
        else:
            """
            Example resource arn: arn:aws:states:::sagemaker:createProcessingJob
            """

            kwargs[Field.Resource.value] = get_service_integration_arn(
                SAGEMAKER_SERVICE_NAME, SageMakerApi.CreateProcessingJob)

        if isinstance(job_name, str):
            processing_parameters = processing_config(
                processor=processor,
                inputs=inputs,
                outputs=outputs,
                container_arguments=container_arguments,
                container_entrypoint=container_entrypoint,
                kms_key_id=kms_key_id,
                job_name=job_name)
        else:
            processing_parameters = processing_config(
                processor=processor,
                inputs=inputs,
                outputs=outputs,
                container_arguments=container_arguments,
                container_entrypoint=container_entrypoint,
                kms_key_id=kms_key_id)

        if isinstance(job_name, Placeholder):
            processing_parameters['ProcessingJobName'] = job_name

        if experiment_config is not None:
            processing_parameters['ExperimentConfig'] = experiment_config

        if tags:
            processing_parameters['Tags'] = tags if isinstance(
                tags, Placeholder) else tags_dict_to_kv_list(tags)

        if 'S3Operations' in processing_parameters:
            del processing_parameters['S3Operations']

        if Field.Parameters.value in kwargs and isinstance(
                kwargs[Field.Parameters.value], dict):
            # Update processing_parameters with input parameters
            merge_dicts(processing_parameters, kwargs[Field.Parameters.value])

        kwargs[Field.Parameters.value] = processing_parameters
        super(ProcessingStep, self).__init__(state_id, **kwargs)
コード例 #12
0
    def __init__(self,
                 state_id,
                 estimator,
                 job_name,
                 data=None,
                 hyperparameters=None,
                 mini_batch_size=None,
                 experiment_config=None,
                 wait_for_completion=True,
                 tags=None,
                 output_data_config_path=None,
                 **kwargs):
        """
        Args:
            state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine.
            estimator (sagemaker.estimator.EstimatorBase): The estimator for the training step. Can be a `BYO estimator, Framework estimator <https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms.html>`_ or `Amazon built-in algorithm estimator <https://docs.aws.amazon.com/sagemaker/latest/dg/algos.html>`_.
            job_name (str or Placeholder): Specify a training job name, this is required for the training job to run. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution.
            data: Information about the training data. Please refer to the ``fit()`` method of the associated estimator, as this can take any of the following forms:

                * (str or Placeholder) - The S3 location where training data is saved.
                * (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) - If using multiple
                    channels for training data, you can specify a dict mapping channel names to
                    strings or :func:`~sagemaker.inputs.TrainingInput` objects.
                * (sagemaker.inputs.TrainingInput) - Channel configuration for S3 data sources that can
                    provide additional information about the training dataset. See
                    :func:`sagemaker.inputs.TrainingInput` for full details.
                * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of
                    Amazon :class:`Record` objects serialized and stored in S3.
                    For use with an estimator for an Amazon algorithm.
                * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of
                    :class:`sagemaker.amazon.amazon_estimator.RecordSet` objects,
                    where each instance is a different channel of training data.
            hyperparameters: Parameters used for training.
                * (dict, optional) - Hyperparameters supplied will be merged with the Hyperparameters specified in the estimator.
                    If there are duplicate entries, the value provided through this property will be used. (Default: Hyperparameters specified in the estimator.)
                * (Placeholder, optional) - The TrainingStep will use the hyperparameters specified by the Placeholder's value instead of the hyperparameters specified in the estimator.
            mini_batch_size (int): Specify this argument only when estimator is a built-in estimator of an Amazon algorithm. For other estimators, batch size should be specified in the estimator.
            experiment_config (dict, optional): Specify the experiment config for the training. (Default: None)
            wait_for_completion (bool, optional): Boolean value set to `True` if the Task state should wait for the training job to complete before proceeding to the next step in the workflow. Set to `False` if the Task state should submit the training job and proceed to the next step. (default: True)
            tags (list[dict], optional): `List to tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource.
            output_data_config_path (str or Placeholder, optional): S3 location for saving the training result (model
                artifacts and output files). If specified, it overrides the `output_path` property of `estimator`.
        """
        self.estimator = estimator
        self.job_name = job_name

        if wait_for_completion:
            """
            Example resource arn: arn:aws:states:::sagemaker:createTrainingJob.sync
            """

            kwargs[Field.Resource.value] = get_service_integration_arn(
                SAGEMAKER_SERVICE_NAME, SageMakerApi.CreateTrainingJob,
                IntegrationPattern.WaitForCompletion)
        else:
            """
            Example resource arn: arn:aws:states:::sagemaker:createTrainingJob
            """

            kwargs[Field.Resource.value] = get_service_integration_arn(
                SAGEMAKER_SERVICE_NAME, SageMakerApi.CreateTrainingJob)
        # Convert `data` Placeholder to a JSONPath string because sagemaker.workflow.airflow.training_config does not
        # accept Placeholder in the `input` argument. We will suffix the 'S3Uri' key in `parameters` with ".$" later.
        is_data_placeholder = isinstance(data, Placeholder)
        if is_data_placeholder:
            data = data.to_jsonpath()

        if isinstance(job_name, str):
            parameters = training_config(estimator=estimator,
                                         inputs=data,
                                         job_name=job_name,
                                         mini_batch_size=mini_batch_size)
        else:
            parameters = training_config(estimator=estimator,
                                         inputs=data,
                                         mini_batch_size=mini_batch_size)

        if estimator.debugger_hook_config != None and estimator.debugger_hook_config is not False:
            parameters[
                'DebugHookConfig'] = estimator.debugger_hook_config._to_request_dict(
                )

        if estimator.rules != None:
            parameters['DebugRuleConfigurations'] = [
                rule.to_debugger_rule_config_dict() for rule in estimator.rules
            ]

        if isinstance(job_name, Placeholder):
            parameters['TrainingJobName'] = job_name

        if output_data_config_path is not None:
            parameters['OutputDataConfig'][
                'S3OutputPath'] = output_data_config_path

        if data is not None and is_data_placeholder:
            # Replace the 'S3Uri' key with one that supports JSONpath value.
            # Support for uri str only: The list will only contain 1 element
            data_uri = parameters['InputDataConfig'][0]['DataSource'][
                'S3DataSource'].pop('S3Uri', None)
            parameters['InputDataConfig'][0]['DataSource']['S3DataSource'][
                'S3Uri.$'] = data_uri

        if hyperparameters is not None:
            if not isinstance(hyperparameters, Placeholder):
                if estimator.hyperparameters() is not None:
                    hyperparameters = self.__merge_hyperparameters(
                        hyperparameters, estimator.hyperparameters())
            parameters['HyperParameters'] = hyperparameters

        if experiment_config is not None:
            parameters['ExperimentConfig'] = experiment_config

        if 'S3Operations' in parameters:
            del parameters['S3Operations']

        if tags:
            parameters['Tags'] = tags_dict_to_kv_list(tags)

        kwargs[Field.Parameters.value] = parameters
        super(TrainingStep, self).__init__(state_id, **kwargs)
コード例 #13
0
    def __init__(self,
                 state_id,
                 transformer,
                 job_name,
                 model_name,
                 data,
                 data_type='S3Prefix',
                 content_type=None,
                 compression_type=None,
                 split_type=None,
                 experiment_config=None,
                 wait_for_completion=True,
                 tags=None,
                 input_filter=None,
                 output_filter=None,
                 join_source=None,
                 **kwargs):
        """
        Args:
            state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine.
            transformer (sagemaker.transformer.Transformer): The SageMaker transformer to use in the TransformStep.
            job_name (str or Placeholder): Specify a transform job name. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution.
            model_name (str or Placeholder): Specify a model name for the transform job to use. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution.
            data (str or Placeholder): Input data location in S3.
            data_type (str or Placeholder): What the S3 location defines (default: 'S3Prefix').
                Valid values:

                * 'S3Prefix' - the S3 URI defines a key name prefix. All objects with this prefix will
                    be used as inputs for the transform job.
                * 'ManifestFile' - the S3 URI points to a single manifest file listing each S3 object
                    to use as an input for the transform job.
            content_type (str or Placeholder): MIME type of the input data (default: None).
            compression_type (str or Placeholder): Compression type of the input data, if compressed (default: None). Valid values: 'Gzip', None.
            split_type (str or Placeholder): The record delimiter for the input object (default: 'None'). Valid values: 'None', 'Line', 'RecordIO', and 'TFRecord'.
            experiment_config (dict or Placeholder, optional): Specify the experiment config for the transform. (Default: None)
            wait_for_completion(bool, optional): Boolean value set to `True` if the Task state should wait for the transform job to complete before proceeding to the next step in the workflow. Set to `False` if the Task state should submit the transform job and proceed to the next step. (default: True)
            tags (list[dict] or Placeholder, optional): `List to tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource.
            input_filter (str or Placeholder): A JSONPath to select a portion of the input to pass to the algorithm container for inference. If you omit the field, it gets the value ‘$’, representing the entire input. For CSV data, each row is taken as a JSON array, so only index-based JSONPaths can be applied, e.g. $[0], $[1:]. CSV data should follow the RFC format. See Supported JSONPath Operators for a table of supported JSONPath operators. For more information, see the SageMaker API documentation for CreateTransformJob. Some examples: “$[1:]”, “$.features” (default: None).
            output_filter (str or Placeholder): A JSONPath to select a portion of the joined/original output to return as the output. For more information, see the SageMaker API documentation for CreateTransformJob. Some examples: “$[1:]”, “$.prediction” (default: None).
            join_source (str or Placeholder): The source of data to be joined to the transform output. It can be set to ‘Input’ meaning the entire input record will be joined to the inference result. You can use OutputFilter to select the useful portion before uploading to S3. (default: None). Valid values: Input, None.
            parameters(dict, optional): The value of this field is merged with other arguments to become the request payload for SageMaker `CreateTransformJob<https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_CreateTransformJob.html>`_.
                You can use `parameters` to override the value provided by other arguments and specify any field's value dynamically using `Placeholders<https://aws-step-functions-data-science-sdk.readthedocs.io/en/stable/placeholders.html?highlight=placeholder#stepfunctions.inputs.Placeholder>`_.

        """
        if wait_for_completion:
            """
            Example resource arn: arn:aws:states:::sagemaker:createTransformJob.sync
            """

            kwargs[Field.Resource.value] = get_service_integration_arn(
                SAGEMAKER_SERVICE_NAME, SageMakerApi.CreateTransformJob,
                IntegrationPattern.WaitForCompletion)
        else:
            """
            Example resource arn: arn:aws:states:::sagemaker:createTransformJob
            """

            kwargs[Field.Resource.value] = get_service_integration_arn(
                SAGEMAKER_SERVICE_NAME, SageMakerApi.CreateTransformJob)

        if isinstance(job_name, str):
            transform_parameters = transform_config(
                transformer=transformer,
                data=data,
                data_type=data_type,
                content_type=content_type,
                compression_type=compression_type,
                split_type=split_type,
                job_name=job_name,
                input_filter=input_filter,
                output_filter=output_filter,
                join_source=join_source)
        else:
            transform_parameters = transform_config(
                transformer=transformer,
                data=data,
                data_type=data_type,
                content_type=content_type,
                compression_type=compression_type,
                split_type=split_type,
                input_filter=input_filter,
                output_filter=output_filter,
                join_source=join_source)

        if isinstance(job_name, Placeholder):
            transform_parameters['TransformJobName'] = job_name

        transform_parameters['ModelName'] = model_name

        if experiment_config is not None:
            transform_parameters['ExperimentConfig'] = experiment_config

        if tags:
            transform_parameters['Tags'] = tags if isinstance(
                tags, Placeholder) else tags_dict_to_kv_list(tags)

        if Field.Parameters.value in kwargs and isinstance(
                kwargs[Field.Parameters.value], dict):
            # Update transform_parameters with input parameters
            merge_dicts(transform_parameters, kwargs[Field.Parameters.value])

        kwargs[Field.Parameters.value] = transform_parameters
        super(TransformStep, self).__init__(state_id, **kwargs)
コード例 #14
0
    def __init__(self,
                 state_id,
                 tuner,
                 job_name,
                 data,
                 wait_for_completion=True,
                 tags=None,
                 **kwargs):
        """
        Args:
            state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine.
            tuner (sagemaker.tuner.HyperparameterTuner): The tuner to use in the TuningStep.
            job_name (str or Placeholder): Specify a tuning job name.  We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution.
            data: Information about the training data. Please refer to the ``fit()`` method of the associated estimator in the tuner, as this can take any of the following forms:

                * (str) - The S3 location where training data is saved.
                * (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) - If using multiple
                    channels for training data, you can specify a dict mapping channel names to
                    strings or :func:`~sagemaker.inputs.TrainingInput` objects.
                * (sagemaker.inputs.TrainingInput) - Channel configuration for S3 data sources that can
                    provide additional information about the training dataset. See
                    :func:`sagemaker.inputs.TrainingInput` for full details.
                * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of
                    Amazon :class:`Record` objects serialized and stored in S3.
                    For use with an estimator for an Amazon algorithm.
                * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of
                    :class:`sagemaker.amazon.amazon_estimator.RecordSet` objects,
                    where each instance is a different channel of training data.
            wait_for_completion(bool, optional): Boolean value set to `True` if the Task state should wait for the tuning job to complete before proceeding to the next step in the workflow. Set to `False` if the Task state should submit the tuning job and proceed to the next step. (default: True)
            tags (list[dict] or Placeholder, optional): `List of tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource.
            parameters(dict, optional): The value of this field is merged with other arguments to become the request payload for SageMaker `CreateHyperParameterTuningJob <https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_CreateHyperParameterTuningJob.html>`_.
                You can use `parameters` to override the value provided by other arguments and specify any field's value dynamically using `Placeholders <https://aws-step-functions-data-science-sdk.readthedocs.io/en/stable/placeholders.html?highlight=placeholder#stepfunctions.inputs.Placeholder>`_.

        """
        if wait_for_completion:
            """
            Example resource arn: arn:aws:states:::sagemaker:createHyperParameterTuningJob.sync
            """

            kwargs[Field.Resource.value] = get_service_integration_arn(
                SAGEMAKER_SERVICE_NAME,
                SageMakerApi.CreateHyperParameterTuningJob,
                IntegrationPattern.WaitForCompletion)
        else:
            """
            Example resource arn: arn:aws:states:::sagemaker:createHyperParameterTuningJob
            """

            kwargs[Field.Resource.value] = get_service_integration_arn(
                SAGEMAKER_SERVICE_NAME,
                SageMakerApi.CreateHyperParameterTuningJob)

        tuning_parameters = tuning_config(tuner=tuner,
                                          inputs=data,
                                          job_name=job_name).copy()

        if job_name is not None:
            tuning_parameters['HyperParameterTuningJobName'] = job_name

        if 'S3Operations' in tuning_parameters:
            del tuning_parameters['S3Operations']

        if tags:
            tuning_parameters['Tags'] = tags if isinstance(
                tags, Placeholder) else tags_dict_to_kv_list(tags)

        if Field.Parameters.value in kwargs and isinstance(
                kwargs[Field.Parameters.value], dict):
            # Update tuning parameters with input parameters
            merge_dicts(tuning_parameters, kwargs[Field.Parameters.value])

        kwargs[Field.Parameters.value] = tuning_parameters
        super(TuningStep, self).__init__(state_id, **kwargs)
コード例 #15
0
    def __init__(self,
                 state_id,
                 transformer,
                 job_name,
                 model_name,
                 data,
                 data_type='S3Prefix',
                 content_type=None,
                 compression_type=None,
                 split_type=None,
                 experiment_config=None,
                 wait_for_completion=True,
                 tags=None,
                 **kwargs):
        """
        Args:
            state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine.
            transformer (sagemaker.transformer.Transformer): The SageMaker transformer to use in the TransformStep.
            job_name (str or Placeholder): Specify a transform job name. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution.
            model_name (str or Placeholder): Specify a model name for the transform job to use. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution.
            data (str): Input data location in S3.
            data_type (str): What the S3 location defines (default: 'S3Prefix').
                Valid values:

                * 'S3Prefix' - the S3 URI defines a key name prefix. All objects with this prefix will
                    be used as inputs for the transform job.
                * 'ManifestFile' - the S3 URI points to a single manifest file listing each S3 object
                    to use as an input for the transform job.
            content_type (str): MIME type of the input data (default: None).
            compression_type (str): Compression type of the input data, if compressed (default: None). Valid values: 'Gzip', None.
            split_type (str): The record delimiter for the input object (default: 'None'). Valid values: 'None', 'Line', 'RecordIO', and 'TFRecord'.
            experiment_config (dict, optional): Specify the experiment config for the transform. (Default: None)
            wait_for_completion(bool, optional): Boolean value set to `True` if the Task state should wait for the transform job to complete before proceeding to the next step in the workflow. Set to `False` if the Task state should submit the transform job and proceed to the next step. (default: True)
            tags (list[dict], optional): `List to tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource.
        """
        if wait_for_completion:
            kwargs[
                Field.Resource.
                value] = 'arn:aws:states:::sagemaker:createTransformJob.sync'
        else:
            kwargs[Field.Resource.
                   value] = 'arn:aws:states:::sagemaker:createTransformJob'

        if isinstance(job_name, str):
            parameters = transform_config(transformer=transformer,
                                          data=data,
                                          data_type=data_type,
                                          content_type=content_type,
                                          compression_type=compression_type,
                                          split_type=split_type,
                                          job_name=job_name)
        else:
            parameters = transform_config(transformer=transformer,
                                          data=data,
                                          data_type=data_type,
                                          content_type=content_type,
                                          compression_type=compression_type,
                                          split_type=split_type)

        if isinstance(job_name, (ExecutionInput, StepInput)):
            parameters['TransformJobName'] = job_name

        parameters['ModelName'] = model_name

        if experiment_config is not None:
            parameters['ExperimentConfig'] = experiment_config

        if tags:
            parameters['Tags'] = tags_dict_to_kv_list(tags)

        kwargs[Field.Parameters.value] = parameters
        super(TransformStep, self).__init__(state_id, **kwargs)