def __init__(self,
                 state_id,
                 model,
                 model_name=None,
                 instance_type=None,
                 tags=None,
                 **kwargs):
        """
        Args:
            state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine.
            model (sagemaker.model.Model): The SageMaker model to use in the ModelStep. If :py:class:`TrainingStep` was used to train the model and saving the model is the next step in the workflow, the output of :py:func:`TrainingStep.get_expected_model()` can be passed here.
            model_name (str or Placeholder, optional): Specify a model name, this is required for creating the model. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution.
            instance_type (str, optional): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
            tags (list[dict] or Placeholders, optional): `List of tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource.
            parameters(dict, optional): The value of this field is merged with other arguments to become the request payload for SageMaker `CreateModel <https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_CreateModel.html>`_. (Default: None)
                You can use `parameters` to override the value provided by other arguments and specify any field's value dynamically using `Placeholders <https://aws-step-functions-data-science-sdk.readthedocs.io/en/stable/placeholders.html?highlight=placeholder#stepfunctions.inputs.Placeholder>`_.
        """
        if isinstance(model, FrameworkModel):
            model_parameters = model_config(model=model,
                                            instance_type=instance_type,
                                            role=model.role,
                                            image_uri=model.image_uri)
            if model_name:
                model_parameters['ModelName'] = model_name
        elif isinstance(model, Model):
            model_parameters = {
                'ExecutionRoleArn': model.role,
                'ModelName': model_name or model.name,
                'PrimaryContainer': {
                    'Environment': model.env,
                    'Image': model.image_uri,
                    'ModelDataUrl': model.model_data
                }
            }
        else:
            raise ValueError(
                "Expected 'model' parameter to be of type 'sagemaker.model.Model', but received type '{}'"
                .format(type(model).__name__))

        if 'S3Operations' in model_parameters:
            del model_parameters['S3Operations']

        if tags:
            model_parameters['Tags'] = tags if isinstance(
                tags, Placeholder) else tags_dict_to_kv_list(tags)

        if Field.Parameters.value in kwargs and isinstance(
                kwargs[Field.Parameters.value], dict):
            # Update model parameters with input parameters
            merge_dicts(model_parameters, kwargs[Field.Parameters.value])

        kwargs[Field.Parameters.value] = model_parameters
        """
        Example resource arn: arn:aws:states:::sagemaker:createModel
        """

        kwargs[Field.Resource.value] = get_service_integration_arn(
            SAGEMAKER_SERVICE_NAME, SageMakerApi.CreateModel)

        super(ModelStep, self).__init__(state_id, **kwargs)
def test_byo_model_config(sagemaker_session):
    byo_model = model.Model(
        model_data="{{ model_data }}",
        image="{{ image }}",
        role="{{ role }}",
        env={"{{ key }}": "{{ value }}"},
        name="model",
        sagemaker_session=sagemaker_session,
    )

    config = airflow.model_config(instance_type="ml.c4.xlarge",
                                  model=byo_model)
    expected_config = {
        "ModelName": "model",
        "PrimaryContainer": {
            "Image": "{{ image }}",
            "Environment": {
                "{{ key }}": "{{ value }}"
            },
            "ModelDataUrl": "{{ model_data }}",
        },
        "ExecutionRoleArn": "{{ role }}",
    }

    assert config == expected_config
Exemple #3
0
    def __init__(self, state_id, model, model_name=None, instance_type=None, **kwargs):
        """
        Args:
            state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine.
            model (sagemaker.model.Model): The SageMaker model to use in the ModelStep. If :py:class:`TrainingStep` was used to train the model and saving the model is the next step in the workflow, the output of :py:func:`TrainingStep.get_expected_model()` can be passed here.
            model_name (str or Placeholder, optional): Specify a model name, this is required for creating the model. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution.
            instance_type (str, optional): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. This parameter is typically required when the estimator used is not an `Amazon built-in algorithm <https://docs.aws.amazon.com/sagemaker/latest/dg/algos.html>`_.
        """
        if isinstance(model, FrameworkModel):
            parameters = model_config(model=model, instance_type=instance_type, role=model.role, image=model.image)
            if model_name:
                parameters['ModelName'] = model_name
        elif isinstance(model, Model):
            parameters = {
                'ExecutionRoleArn': model.role,
                'ModelName': model_name or model.name,
                'PrimaryContainer': {
                    'Environment': {},
                    'Image': model.image,
                    'ModelDataUrl': model.model_data
                }
            }
        else:
            raise ValueError("Expected 'model' parameter to be of type 'sagemaker.model.Model', but received type '{}'".format(type(model).__name__))

        if 'S3Operations' in parameters:
            del parameters['S3Operations']

        kwargs[Field.Parameters.value] = parameters
        kwargs[Field.Resource.value] = 'arn:aws:states:::sagemaker:createModel'

        super(ModelStep, self).__init__(state_id, **kwargs)
def test_framework_model_config(sagemaker_session):
    chainer_model = chainer.ChainerModel(
        model_data="{{ model_data }}",
        role="{{ role }}",
        entry_point="{{ entry_point }}",
        source_dir="{{ source_dir }}",
        image=None,
        py_version="py3",
        framework_version="5.0.0",
        model_server_workers="{{ model_server_worker }}",
        sagemaker_session=sagemaker_session,
    )

    config = airflow.model_config(instance_type="ml.c4.xlarge",
                                  model=chainer_model)
    expected_config = {
        "ModelName": "sagemaker-chainer-%s" % TIME_STAMP,
        "PrimaryContainer": {
            "Image":
            "520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-chainer:5.0.0-cpu-py3",
            "Environment": {
                "SAGEMAKER_PROGRAM":
                "{{ entry_point }}",
                "SAGEMAKER_SUBMIT_DIRECTORY":
                "s3://output/sagemaker-chainer-%s/source/sourcedir.tar.gz" %
                TIME_STAMP,
                "SAGEMAKER_ENABLE_CLOUDWATCH_METRICS":
                "false",
                "SAGEMAKER_CONTAINER_LOG_LEVEL":
                "20",
                "SAGEMAKER_REGION":
                "us-west-2",
                "SAGEMAKER_MODEL_SERVER_WORKERS":
                "{{ model_server_worker }}",
            },
            "ModelDataUrl": "{{ model_data }}",
        },
        "ExecutionRoleArn": "{{ role }}",
        "S3Operations": {
            "S3Upload": [{
                "Path":
                "{{ source_dir }}",
                "Bucket":
                "output",
                "Key":
                "sagemaker-chainer-%s/source/sourcedir.tar.gz" % TIME_STAMP,
                "Tar":
                True,
            }]
        },
    }

    assert config == expected_config
    def __init__(self,
                 state_id,
                 model,
                 model_name=None,
                 instance_type=None,
                 tags=None,
                 **kwargs):
        """
        Args:
            state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine.
            model (sagemaker.model.Model): The SageMaker model to use in the ModelStep. If :py:class:`TrainingStep` was used to train the model and saving the model is the next step in the workflow, the output of :py:func:`TrainingStep.get_expected_model()` can be passed here.
            model_name (str or Placeholder, optional): Specify a model name, this is required for creating the model. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution.
            instance_type (str, optional): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'.
            tags (list[dict], optional): `List to tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource.
        """
        if isinstance(model, FrameworkModel):
            parameters = model_config(model=model,
                                      instance_type=instance_type,
                                      role=model.role,
                                      image_uri=model.image_uri)
            if model_name:
                parameters['ModelName'] = model_name
        elif isinstance(model, Model):
            parameters = {
                'ExecutionRoleArn': model.role,
                'ModelName': model_name or model.name,
                'PrimaryContainer': {
                    'Environment': model.env,
                    'Image': model.image_uri,
                    'ModelDataUrl': model.model_data
                }
            }
        else:
            raise ValueError(
                "Expected 'model' parameter to be of type 'sagemaker.model.Model', but received type '{}'"
                .format(type(model).__name__))

        if 'S3Operations' in parameters:
            del parameters['S3Operations']

        if tags:
            parameters['Tags'] = tags_dict_to_kv_list(tags)

        kwargs[Field.Parameters.value] = parameters
        """
        Example resource arn: arn:aws:states:::sagemaker:createModel
        """

        kwargs[Field.Resource.value] = get_service_integration_arn(
            SAGEMAKER_SERVICE_NAME, SageMakerApi.CreateModel)

        super(ModelStep, self).__init__(state_id, **kwargs)
Exemple #6
0
def test_framework_model_config(sagemaker_session):
    chainer_model = chainer.ChainerModel(
        model_data="{{ model_data }}",
        role="{{ role }}",
        entry_point="{{ entry_point }}",
        source_dir="{{ source_dir }}",
        image=None,
        py_version='py3',
        framework_version='5.0.0',
        model_server_workers="{{ model_server_worker }}",
        sagemaker_session=sagemaker_session)

    config = airflow.model_config(instance_type='ml.c4.xlarge',
                                  model=chainer_model)
    expected_config = {
        'ModelName': "sagemaker-chainer-%s" % TIME_STAMP,
        'PrimaryContainer': {
            'Image':
            '520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-chainer:5.0.0-cpu-py3',
            'Environment': {
                'SAGEMAKER_PROGRAM':
                '{{ entry_point }}',
                'SAGEMAKER_SUBMIT_DIRECTORY':
                "s3://output/sagemaker-chainer-%s/source/sourcedir.tar.gz" %
                TIME_STAMP,
                'SAGEMAKER_ENABLE_CLOUDWATCH_METRICS':
                'false',
                'SAGEMAKER_CONTAINER_LOG_LEVEL':
                '20',
                'SAGEMAKER_REGION':
                'us-west-2',
                'SAGEMAKER_MODEL_SERVER_WORKERS':
                '{{ model_server_worker }}'
            },
            'ModelDataUrl': '{{ model_data }}'
        },
        'ExecutionRoleArn': '{{ role }}',
        'S3Operations': {
            'S3Upload': [{
                'Path':
                '{{ source_dir }}',
                'Bucket':
                'output',
                'Key':
                "sagemaker-chainer-%s/source/sourcedir.tar.gz" % TIME_STAMP,
                'Tar':
                True
            }]
        }
    }

    assert config == expected_config
def _build_airflow_workflow(estimator,
                            instance_type,
                            inputs=None,
                            mini_batch_size=None):
    training_config = sm_airflow.training_config(
        estimator=estimator, inputs=inputs, mini_batch_size=mini_batch_size)

    model = estimator.create_model()
    assert model is not None

    model_config = sm_airflow.model_config(instance_type, model)
    assert model_config is not None

    transform_config = sm_airflow.transform_config_from_estimator(
        estimator=estimator,
        task_id="transform_config",
        task_type="training",
        instance_count=SINGLE_INSTANCE_COUNT,
        instance_type=estimator.train_instance_type,
        data=inputs,
        content_type="text/csv",
        input_filter="$",
        output_filter="$",
    )

    default_args = {
        "owner": "airflow",
        "start_date": airflow.utils.dates.days_ago(2),
        "provide_context": True,
    }

    dag = DAG("tensorflow_example",
              default_args=default_args,
              schedule_interval="@once")

    train_op = SageMakerTrainingOperator(task_id="tf_training",
                                         config=training_config,
                                         wait_for_completion=True,
                                         dag=dag)

    transform_op = SageMakerTransformOperator(task_id="transform_operator",
                                              config=transform_config,
                                              wait_for_completion=True,
                                              dag=dag)

    transform_op.set_upstream(train_op)

    return training_config
def test_amazon_alg_model_config(sagemaker_session):
    pca_model = pca.PCAModel(model_data="{{ model_data }}",
                             role="{{ role }}",
                             sagemaker_session=sagemaker_session)

    config = airflow.model_config(instance_type="ml.c4.xlarge",
                                  model=pca_model)
    expected_config = {
        "ModelName": "pca-%s" % TIME_STAMP,
        "PrimaryContainer": {
            "Image": "174872318107.dkr.ecr.us-west-2.amazonaws.com/pca:1",
            "Environment": {},
            "ModelDataUrl": "{{ model_data }}",
        },
        "ExecutionRoleArn": "{{ role }}",
    }

    assert config == expected_config
Exemple #9
0
def test_amazon_alg_model_config(sagemaker_session):
    pca_model = pca.PCAModel(model_data="{{ model_data }}",
                             role="{{ role }}",
                             sagemaker_session=sagemaker_session)

    config = airflow.model_config(instance_type='ml.c4.xlarge',
                                  model=pca_model)
    expected_config = {
        'ModelName': "pca-%s" % TIME_STAMP,
        'PrimaryContainer': {
            'Image': '174872318107.dkr.ecr.us-west-2.amazonaws.com/pca:1',
            'Environment': {},
            'ModelDataUrl': '{{ model_data }}'
        },
        'ExecutionRoleArn': '{{ role }}'
    }

    assert config == expected_config
def test_byo_framework_model_config(sagemaker_session):
    byo_model = model.FrameworkModel(
        model_data="{{ model_data }}",
        image="{{ image }}",
        role="{{ role }}",
        entry_point="{{ entry_point }}",
        source_dir="{{ source_dir }}",
        env={"{{ key }}": "{{ value }}"},
        name="model",
        sagemaker_session=sagemaker_session,
    )

    config = airflow.model_config(instance_type="ml.c4.xlarge",
                                  model=byo_model)
    expected_config = {
        "ModelName": "model",
        "PrimaryContainer": {
            "Image": "{{ image }}",
            "Environment": {
                "{{ key }}": "{{ value }}",
                "SAGEMAKER_PROGRAM": "{{ entry_point }}",
                "SAGEMAKER_SUBMIT_DIRECTORY":
                "s3://output/model/source/sourcedir.tar.gz",
                "SAGEMAKER_ENABLE_CLOUDWATCH_METRICS": "false",
                "SAGEMAKER_CONTAINER_LOG_LEVEL": "20",
                "SAGEMAKER_REGION": "us-west-2",
            },
            "ModelDataUrl": "{{ model_data }}",
        },
        "ExecutionRoleArn": "{{ role }}",
        "S3Operations": {
            "S3Upload": [{
                "Path": "{{ source_dir }}",
                "Bucket": "output",
                "Key": "model/source/sourcedir.tar.gz",
                "Tar": True,
            }]
        },
    }

    assert config == expected_config
def test_byo_model_config(sagemaker_session):
    byo_model = model.Model(
        model_data="{{ model_data }}",
        image="{{ image }}",
        role="{{ role }}",
        env={"{{ key }}": "{{ value }}"},
        name='model',
        sagemaker_session=sagemaker_session)

    config = airflow.model_config(instance_type='ml.c4.xlarge', model=byo_model)
    expected_config = {
        'ModelName': 'model',
        'PrimaryContainer': {
            'Image': '{{ image }}',
            'Environment': {'{{ key }}': '{{ value }}'},
            'ModelDataUrl': '{{ model_data }}'
        },
        'ExecutionRoleArn': '{{ role }}'
    }

    assert config == expected_config
Exemple #12
0
def test_byo_framework_model_config(sagemaker_session):
    byo_model = model.FrameworkModel(model_data="{{ model_data }}",
                                     image="{{ image }}",
                                     role="{{ role }}",
                                     entry_point="{{ entry_point }}",
                                     source_dir="{{ source_dir }}",
                                     env={"{{ key }}": "{{ value }}"},
                                     name='model',
                                     sagemaker_session=sagemaker_session)

    config = airflow.model_config(instance_type='ml.c4.xlarge',
                                  model=byo_model)
    expected_config = {
        'ModelName': 'model',
        'PrimaryContainer': {
            'Image': '{{ image }}',
            'Environment': {
                '{{ key }}': '{{ value }}',
                'SAGEMAKER_PROGRAM': '{{ entry_point }}',
                'SAGEMAKER_SUBMIT_DIRECTORY':
                's3://output/model/source/sourcedir.tar.gz',
                'SAGEMAKER_ENABLE_CLOUDWATCH_METRICS': 'false',
                'SAGEMAKER_CONTAINER_LOG_LEVEL': '20',
                'SAGEMAKER_REGION': 'us-west-2'
            },
            'ModelDataUrl': '{{ model_data }}'
        },
        'ExecutionRoleArn': '{{ role }}',
        'S3Operations': {
            'S3Upload': [{
                'Path': '{{ source_dir }}',
                'Bucket': 'output',
                'Key': 'model/source/sourcedir.tar.gz',
                'Tar': True
            }]
        }
    }

    assert config == expected_config
Exemple #13
0
    def __init__(
        self,
        state_id,
        model,
        model_data_url=None,
        sagemaker_submit_directory=None,
        model_name=None,
        instance_type=None,
        tags=None,
        **kwargs,
    ):
        """
        Args:
            state_id (str): State name whose length **must be** less than or
            equal to 128 unicode characters. State names **must be** unique
            within the scope of the whole state machine.

            model (sagemaker.model.Model): The SageMaker model to use in the
            ModelStep. If :py:class:`TrainingStep` was used to train the model
            and saving the model is the next step in the workflow, the output
            of :py:func:`TrainingStep.get_expected_model()` can be passed here.

            model_name (str or Placeholder, optional): Specify a model name,
            this is required for creating the model. We recommend to use
            :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder
            collection to pass the value dynamically in each execution.

            instance_type (str, optional): The EC2 instance type to deploy this
            Model to. For example, 'ml.p2.xlarge'. This parameter is typically
            required when the estimator used is not an `Amazon built-in
            algorithm
            <https://docs.aws.amazon.com/sagemaker/latest/dg/algos.html>`_.

            tags (list[dict], optional): `List to tags
            <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to
            associate with the resource.
        """
        if isinstance(model, FrameworkModel):
            parameters = model_config(
                model=model,
                instance_type=instance_type,
                role=model.role,
                image=model.image,
            )
            if model_name:
                parameters["ModelName"] = model_name
            # placeholder for model data url
            if model_data_url:
                parameters["PrimaryContainer"]["ModelDataUrl"] = model_data_url
            # placeholder for sagemaker script
            if sagemaker_submit_directory:
                parameters["PrimaryContainer"]["Environment"][
                    "SAGEMAKER_SUBMIT_DIRECTORY"
                ] = sagemaker_submit_directory
            print(parameters)
        elif isinstance(model, Model):
            parameters = {
                "ExecutionRoleArn": model.role,
                "ModelName": model_name or model.name,
                "PrimaryContainer": {
                    "Environment": {},
                    "Image": model.image,
                    "ModelDataUrl": model.model_data,
                },
            }
        else:
            raise ValueError(
                (
                    f"Expected 'model' parameter to be of type 'sagemaker.model.Model'"
                    f", but received type '{type(model).__name__}'"
                )
            )

        if "S3Operations" in parameters:
            del parameters["S3Operations"]

        if tags:
            parameters["Tags"] = tags_dict_to_kv_list(tags)

        kwargs[Field.Parameters.value] = parameters
        kwargs[Field.Resource.value] = "arn:aws:states:::sagemaker:createModel"

        super(MLMaxModelStep, self).__init__(state_id, **kwargs)