コード例 #1
0
def test_model_config_from_amazon_alg_estimator(sagemaker_session):
    knn_estimator = knn.KNN(role="{{ role }}",
                            train_instance_count="{{ instance_count }}",
                            train_instance_type='ml.m4.xlarge',
                            k=16,
                            sample_size=128,
                            predictor_type='regressor',
                            sagemaker_session=sagemaker_session)

    record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, 'S3Prefix')

    # simulate training
    airflow.training_config(knn_estimator, record, mini_batch_size=256)

    config = airflow.model_config_from_estimator(instance_type='ml.c4.xlarge',
                                                 estimator=knn_estimator,
                                                 task_id='task_id',
                                                 task_type='tuning')
    expected_config = {
        'ModelName': "knn-%s" % TIME_STAMP,
        'PrimaryContainer': {
            'Image':
            '174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1',
            'Environment': {},
            'ModelDataUrl':
            "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Tuning']['BestTrainingJob']"
            "['TrainingJobName'] }}/output/model.tar.gz"
        },
        'ExecutionRoleArn': '{{ role }}'
    }

    assert config == expected_config
コード例 #2
0
def test_transform_config_from_amazon_alg_estimator(sagemaker_session):
    knn_estimator = knn.KNN(
        role="{{ role }}",
        train_instance_count="{{ instance_count }}",
        train_instance_type="ml.m4.xlarge",
        k=16,
        sample_size=128,
        predictor_type="regressor",
        sagemaker_session=sagemaker_session,
    )

    record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, "S3Prefix")
    transform_data = "{{ transform_data }}"

    # simulate training
    airflow.training_config(knn_estimator, record, mini_batch_size=256)

    config = airflow.transform_config_from_estimator(
        estimator=knn_estimator,
        task_id="task_id",
        task_type="training",
        instance_count="{{ instance_count }}",
        instance_type="ml.p2.xlarge",
        data=transform_data,
    )
    expected_config = {
        "Model": {
            "ModelName": "knn-%s" % TIME_STAMP,
            "PrimaryContainer": {
                "Image":
                "174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1",
                "Environment": {},
                "ModelDataUrl":
                "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Training']['TrainingJobName'] }}"
                "/output/model.tar.gz",
            },
            "ExecutionRoleArn": "{{ role }}",
        },
        "Transform": {
            "TransformJobName": "knn-%s" % TIME_STAMP,
            "ModelName": "knn-%s" % TIME_STAMP,
            "TransformInput": {
                "DataSource": {
                    "S3DataSource": {
                        "S3DataType": "S3Prefix",
                        "S3Uri": "{{ transform_data }}"
                    }
                }
            },
            "TransformOutput": {
                "S3OutputPath": "s3://output/knn-%s" % TIME_STAMP
            },
            "TransformResources": {
                "InstanceCount": "{{ instance_count }}",
                "InstanceType": "ml.p2.xlarge",
            },
        },
    }

    assert config == expected_config
コード例 #3
0
def test_model_config_from_amazon_alg_estimator(sagemaker_session):
    job_name = get_job_name('knn')
    knn_estimator = knn.KNN(role="{{ role }}",
                            train_instance_count="{{ instance_count }}",
                            train_instance_type='ml.m4.xlarge',
                            k=16,
                            sample_size=128,
                            predictor_type='regressor',
                            sagemaker_session=sagemaker_session)

    record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, 'S3Prefix')

    # simulate training
    airflow.training_config(knn_estimator, record, mini_batch_size=256)

    config = airflow.model_config_from_estimator(instance_type='ml.c4.xlarge',
                                                 estimator=knn_estimator)
    expected_config = {
        'ModelName': job_name,
        'PrimaryContainer': {
            'Image': '174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1',
            'Environment': {},
            'ModelDataUrl':
            "s3://output/{}/output/model.tar.gz".format(job_name)
        },
        'ExecutionRoleArn': '{{ role }}'
    }

    assert config == expected_config
コード例 #4
0
def test_model_config_from_amazon_alg_estimator(sagemaker_session):
    knn_estimator = knn.KNN(
        role="{{ role }}",
        train_instance_count="{{ instance_count }}",
        train_instance_type="ml.m4.xlarge",
        k=16,
        sample_size=128,
        predictor_type="regressor",
        sagemaker_session=sagemaker_session,
    )

    record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, "S3Prefix")

    # simulate training
    airflow.training_config(knn_estimator, record, mini_batch_size=256)

    config = airflow.model_config_from_estimator(instance_type="ml.c4.xlarge",
                                                 estimator=knn_estimator,
                                                 task_id="task_id",
                                                 task_type="tuning")
    expected_config = {
        "ModelName": "knn-%s" % TIME_STAMP,
        "PrimaryContainer": {
            "Image":
            "174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1",
            "Environment": {},
            "ModelDataUrl":
            "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Tuning']['BestTrainingJob']"
            "['TrainingJobName'] }}/output/model.tar.gz",
        },
        "ExecutionRoleArn": "{{ role }}",
    }

    assert config == expected_config
コード例 #5
0
def test_transform_config_from_amazon_alg_estimator(sagemaker_session):
    knn_estimator = knn.KNN(role="{{ role }}",
                            train_instance_count="{{ instance_count }}",
                            train_instance_type='ml.m4.xlarge',
                            k=16,
                            sample_size=128,
                            predictor_type='regressor',
                            sagemaker_session=sagemaker_session)

    record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, 'S3Prefix')
    transform_data = "{{ transform_data }}"

    # simulate training
    airflow.training_config(knn_estimator, record, mini_batch_size=256)

    config = airflow.transform_config_from_estimator(
        estimator=knn_estimator,
        instance_count="{{ instance_count }}",
        instance_type="ml.p2.xlarge",
        data=transform_data)
    expected_config = {
        'Model': {
            'ModelName':
            "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}",
            'PrimaryContainer': {
                'Image':
                '174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1',
                'Environment': {},
                'ModelDataUrl':
                "s3://output/knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}"
                "/output/model.tar.gz"
            },
            'ExecutionRoleArn': '{{ role }}'
        },
        'Transform': {
            'TransformJobName':
            "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}",
            'ModelName':
            "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}",
            'TransformInput': {
                'DataSource': {
                    'S3DataSource': {
                        'S3DataType': 'S3Prefix',
                        'S3Uri': '{{ transform_data }}'
                    }
                }
            },
            'TransformOutput': {
                'S3OutputPath':
                "s3://output/knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}"
            },
            'TransformResources': {
                'InstanceCount': '{{ instance_count }}',
                'InstanceType': 'ml.p2.xlarge'
            }
        }
    }

    assert config == expected_config
コード例 #6
0
def test_deploy_config_from_framework_estimator(sagemaker_session):
    mxnet_estimator = mxnet.MXNet(
        entry_point="{{ entry_point }}",
        source_dir="{{ source_dir }}",
        py_version='py3',
        framework_version='1.3.0',
        role="{{ role }}",
        train_instance_count=1,
        train_instance_type='ml.m4.xlarge',
        sagemaker_session=sagemaker_session,
        base_job_name="{{ base_job_name }}",
        hyperparameters={'batch_size': 100})

    train_data = "{{ train_data }}"

    # simulate training
    airflow.training_config(mxnet_estimator, train_data)

    config = airflow.deploy_config_from_estimator(estimator=mxnet_estimator,
                                                  task_id='task_id',
                                                  task_type='training',
                                                  initial_instance_count="{{ instance_count}}",
                                                  instance_type="ml.c4.large",
                                                  endpoint_name="mxnet-endpoint")
    expected_config = {
        'Model': {
            'ModelName': "sagemaker-mxnet-%s" % TIME_STAMP,
            'PrimaryContainer': {
                'Image': '520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet:1.3.0-cpu-py3',
                'Environment': {
                    'SAGEMAKER_PROGRAM': '{{ entry_point }}',
                    'SAGEMAKER_SUBMIT_DIRECTORY': "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Training']"
                                                  "['TrainingJobName'] }}/source/sourcedir.tar.gz",
                    'SAGEMAKER_ENABLE_CLOUDWATCH_METRICS': 'false',
                    'SAGEMAKER_CONTAINER_LOG_LEVEL': '20',
                    'SAGEMAKER_REGION': 'us-west-2'},
                'ModelDataUrl': "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Training']['TrainingJobName'] }}"
                                "/output/model.tar.gz"
            },
            'ExecutionRoleArn': '{{ role }}'
        },
        'EndpointConfig': {
            'EndpointConfigName': "sagemaker-mxnet-%s" % TIME_STAMP,
            'ProductionVariants': [{
                'InstanceType': 'ml.c4.large',
                'InitialInstanceCount': '{{ instance_count}}',
                'ModelName': "sagemaker-mxnet-%s" % TIME_STAMP,
                'VariantName': 'AllTraffic',
                'InitialVariantWeight': 1
            }]
        },
        'Endpoint': {
            'EndpointName': 'mxnet-endpoint',
            'EndpointConfigName': "sagemaker-mxnet-%s" % TIME_STAMP
        }
    }

    assert config == expected_config
コード例 #7
0
def test_deploy_config_from_amazon_alg_estimator(sagemaker_session):
    knn_estimator = knn.KNN(role="{{ role }}",
                            train_instance_count="{{ instance_count }}",
                            train_instance_type='ml.m4.xlarge',
                            k=16,
                            sample_size=128,
                            predictor_type='regressor',
                            sagemaker_session=sagemaker_session)

    record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, 'S3Prefix')

    # simulate training
    airflow.training_config(knn_estimator, record, mini_batch_size=256)

    config = airflow.deploy_config_from_estimator(
        estimator=knn_estimator,
        initial_instance_count="{{ instance_count }}",
        instance_type="ml.p2.xlarge")
    expected_config = {
        'Model': {
            'ModelName':
            "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}",
            'PrimaryContainer': {
                'Image':
                '174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1',
                'Environment': {},
                'ModelDataUrl':
                "s3://output/knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}"
                "/output/model.tar.gz"
            },
            'ExecutionRoleArn': '{{ role }}'
        },
        'EndpointConfig': {
            'EndpointConfigName':
            "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}",
            'ProductionVariants': [{
                'InstanceType': 'ml.p2.xlarge',
                'InitialInstanceCount': '{{ instance_count }}',
                'ModelName':
                "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}",
                'VariantName': 'AllTraffic',
                'InitialVariantWeight': 1
            }]
        },
        'Endpoint': {
            'EndpointName':
            "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}",
            'EndpointConfigName':
            "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}"
        }
    }

    assert config == expected_config
コード例 #8
0
def test_model_config_from_framework_estimator(sagemaker_session):
    mxnet_estimator = mxnet.MXNet(
        entry_point="{{ entry_point }}",
        source_dir="{{ source_dir }}",
        py_version="py3",
        framework_version="1.3.0",
        role="{{ role }}",
        train_instance_count=1,
        train_instance_type="ml.m4.xlarge",
        sagemaker_session=sagemaker_session,
        base_job_name="{{ base_job_name }}",
        hyperparameters={"batch_size": 100},
    )

    data = "{{ training_data }}"

    # simulate training
    airflow.training_config(mxnet_estimator, data)

    config = airflow.model_config_from_estimator(
        instance_type="ml.c4.xlarge",
        estimator=mxnet_estimator,
        task_id="task_id",
        task_type="training",
    )
    expected_config = {
        "ModelName": "sagemaker-mxnet-%s" % TIME_STAMP,
        "PrimaryContainer": {
            "Image":
            "520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet:1.3.0-cpu-py3",
            "Environment": {
                "SAGEMAKER_PROGRAM":
                "{{ entry_point }}",
                "SAGEMAKER_SUBMIT_DIRECTORY":
                "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Training']"
                "['TrainingJobName'] }}/source/sourcedir.tar.gz",
                "SAGEMAKER_ENABLE_CLOUDWATCH_METRICS":
                "false",
                "SAGEMAKER_CONTAINER_LOG_LEVEL":
                "20",
                "SAGEMAKER_REGION":
                "us-west-2",
            },
            "ModelDataUrl":
            "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Training']['TrainingJobName'] }}"
            "/output/model.tar.gz",
        },
        "ExecutionRoleArn": "{{ role }}",
    }

    assert config == expected_config
コード例 #9
0
    def __init__(self, state_id, estimator, job_name, data=None, hyperparameters=None, mini_batch_size=None, wait_for_completion=True, **kwargs):
        """
        Args:
            state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine.
            estimator (sagemaker.estimator.EstimatorBase): The estimator for the training step. Can be a `BYO estimator, Framework estimator <https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms.html>`_ or `Amazon built-in algorithm estimator <https://docs.aws.amazon.com/sagemaker/latest/dg/algos.html>`_.
            job_name (str or Placeholder): Specify a training job name, this is required for the training job to run. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution.
            data: Information about the training data. Please refer to the ``fit()`` method of the associated estimator, as this can take any of the following forms:

                * (str) - The S3 location where training data is saved.
                * (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple
                    channels for training data, you can specify a dict mapping channel names to
                    strings or :func:`~sagemaker.session.s3_input` objects.
                * (sagemaker.session.s3_input) - Channel configuration for S3 data sources that can
                    provide additional information about the training dataset. See
                    :func:`sagemaker.session.s3_input` for full details.
                * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of
                    Amazon :class:`Record` objects serialized and stored in S3.
                    For use with an estimator for an Amazon algorithm.
                * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of
                    :class:`sagemaker.amazon.amazon_estimator.RecordSet` objects,
                    where each instance is a different channel of training data.
            hyperparameters (dict, optional): Specify the hyper parameters for the training. (Default: None)
            mini_batch_size (int): Specify this argument only when estimator is a built-in estimator of an Amazon algorithm. For other estimators, batch size should be specified in the estimator.
            wait_for_completion (bool, optional): Boolean value set to `True` if the Task state should wait for the training job to complete before proceeding to the next step in the workflow. Set to `False` if the Task state should submit the training job and proceed to the next step. (default: True)
        """
        self.estimator = estimator
        self.job_name = job_name

        if wait_for_completion:
            kwargs[Field.Resource.value] = 'arn:aws:states:::sagemaker:createTrainingJob.sync'
        else:
            kwargs[Field.Resource.value] = 'arn:aws:states:::sagemaker:createTrainingJob'

        if isinstance(job_name, str):
            parameters = training_config(estimator=estimator, inputs=data, job_name=job_name, mini_batch_size=mini_batch_size)
        else:
            parameters = training_config(estimator=estimator, inputs=data, mini_batch_size=mini_batch_size)

        if isinstance(job_name, (ExecutionInput, StepInput)):
            parameters['TrainingJobName'] = job_name

        if hyperparameters is not None:
            parameters['HyperParameters'] = hyperparameters

        if 'S3Operations' in parameters:
            del parameters['S3Operations']

        kwargs[Field.Parameters.value] = parameters
        super(TrainingStep, self).__init__(state_id, **kwargs)
コード例 #10
0
def test_model_config_from_framework_estimator(sagemaker_session):
    mxnet_estimator = mxnet.MXNet(entry_point="{{ entry_point }}",
                                  source_dir="{{ source_dir }}",
                                  py_version='py3',
                                  framework_version='1.3.0',
                                  role="{{ role }}",
                                  train_instance_count=1,
                                  train_instance_type='ml.m4.xlarge',
                                  sagemaker_session=sagemaker_session,
                                  base_job_name="{{ base_job_name }}",
                                  hyperparameters={'batch_size': 100})

    data = "{{ training_data }}"

    # simulate training
    airflow.training_config(mxnet_estimator, data)

    config = airflow.model_config_from_estimator(instance_type='ml.c4.xlarge',
                                                 estimator=mxnet_estimator)
    expected_config = {
        'ModelName':
        "{{ base_job_name }}-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}",
        'PrimaryContainer': {
            'Image':
            '520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet:1.3.0-cpu-py3',
            'Environment': {
                'SAGEMAKER_PROGRAM':
                '{{ entry_point }}',
                'SAGEMAKER_SUBMIT_DIRECTORY':
                "s3://output/{{ base_job_name }}-"
                "{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}"
                "/source/sourcedir.tar.gz",
                'SAGEMAKER_ENABLE_CLOUDWATCH_METRICS':
                'false',
                'SAGEMAKER_CONTAINER_LOG_LEVEL':
                '20',
                'SAGEMAKER_REGION':
                'us-west-2'
            },
            'ModelDataUrl':
            "s3://output/{{ base_job_name }}-"
            "{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}/output/model.tar.gz"
        },
        'ExecutionRoleArn': '{{ role }}'
    }

    assert config == expected_config
コード例 #11
0
def test_amazon_alg_training_config_required_args(sagemaker_session):
    ntm_estimator = ntm.NTM(
        role="{{ role }}",
        num_topics=10,
        train_instance_count="{{ instance_count }}",
        train_instance_type="ml.c4.2xlarge",
        sagemaker_session=sagemaker_session,
    )

    ntm_estimator.epochs = 32

    record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, "S3Prefix")

    config = airflow.training_config(ntm_estimator,
                                     record,
                                     mini_batch_size=256)
    expected_config = {
        "AlgorithmSpecification": {
            "TrainingImage":
            "174872318107.dkr.ecr.us-west-2.amazonaws.com/ntm:1",
            "TrainingInputMode": "File",
        },
        "OutputDataConfig": {
            "S3OutputPath": "s3://output/"
        },
        "TrainingJobName":
        "ntm-%s" % TIME_STAMP,
        "StoppingCondition": {
            "MaxRuntimeInSeconds": 86400
        },
        "ResourceConfig": {
            "InstanceCount": "{{ instance_count }}",
            "InstanceType": "ml.c4.2xlarge",
            "VolumeSizeInGB": 30,
        },
        "RoleArn":
        "{{ role }}",
        "InputDataConfig": [{
            "DataSource": {
                "S3DataSource": {
                    "S3DataDistributionType": "ShardedByS3Key",
                    "S3DataType": "S3Prefix",
                    "S3Uri": "{{ record }}",
                }
            },
            "ChannelName": "train",
        }],
        "HyperParameters": {
            "num_topics": "10",
            "epochs": "32",
            "mini_batch_size": "256",
            "feature_dim": "100",
        },
    }
    assert config == expected_config
コード例 #12
0
def test_amazon_alg_training_config_required_args(sagemaker_session):
    job_name = get_job_name('ntm')
    ntm_estimator = ntm.NTM(role="{{ role }}",
                            num_topics=10,
                            train_instance_count="{{ instance_count }}",
                            train_instance_type="ml.c4.2xlarge",
                            sagemaker_session=sagemaker_session)

    ntm_estimator.epochs = 32

    record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, 'S3Prefix')

    config = airflow.training_config(ntm_estimator,
                                     record,
                                     mini_batch_size=256)
    expected_config = {
        'AlgorithmSpecification': {
            'TrainingImage':
            '174872318107.dkr.ecr.us-west-2.amazonaws.com/ntm:1',
            'TrainingInputMode': 'File'
        },
        'OutputDataConfig': {
            'S3OutputPath': 's3://output/'
        },
        'TrainingJobName':
        job_name,
        'StoppingCondition': {
            'MaxRuntimeInSeconds': 86400
        },
        'ResourceConfig': {
            'InstanceCount': '{{ instance_count }}',
            'InstanceType': 'ml.c4.2xlarge',
            'VolumeSizeInGB': 30
        },
        'RoleArn':
        '{{ role }}',
        'InputDataConfig': [{
            'DataSource': {
                'S3DataSource': {
                    'S3DataDistributionType': 'ShardedByS3Key',
                    'S3DataType': 'S3Prefix',
                    'S3Uri': '{{ record }}'
                }
            },
            'ChannelName': 'train'
        }],
        'HyperParameters': {
            'num_topics': '10',
            'epochs': '32',
            'mini_batch_size': '256',
            'feature_dim': '100'
        }
    }
    assert config == expected_config
コード例 #13
0
def test_pytorch_airflow_config_uploads_data_source_to_s3_when_inputs_not_provided(
        sagemaker_session, cpu_instance_type):
    with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
        estimator = PyTorch(
            entry_point=PYTORCH_MNIST_SCRIPT,
            role=ROLE,
            framework_version="1.1.0",
            train_instance_count=2,
            train_instance_type=cpu_instance_type,
            hyperparameters={
                "epochs": 6,
                "backend": "gloo"
            },
        )

        train_config = sm_airflow.training_config(estimator=estimator)

        uploaded_s3_data = train_config["HyperParameters"][
            "sagemaker_submit_directory"].strip('"')

        transform_config = sm_airflow.transform_config_from_estimator(
            estimator=estimator,
            task_id="transform_config",
            task_type="training",
            instance_count=SINGLE_INSTANCE_COUNT,
            instance_type=cpu_instance_type,
            data=uploaded_s3_data,
            content_type="text/csv",
        )

        default_args = {
            "owner": "airflow",
            "start_date": airflow.utils.dates.days_ago(2),
            "provide_context": True,
        }

        dag = DAG("tensorflow_example",
                  default_args=default_args,
                  schedule_interval="@once")

        train_op = SageMakerTrainingOperator(task_id="tf_training",
                                             config=train_config,
                                             wait_for_completion=True,
                                             dag=dag)

        transform_op = SageMakerTransformOperator(task_id="transform_operator",
                                                  config=transform_config,
                                                  wait_for_completion=True,
                                                  dag=dag)

        transform_op.set_upstream(train_op)

        _assert_that_s3_url_contains_data(sagemaker_session, uploaded_s3_data)
コード例 #14
0
def test_byo_training_config_required_args(sagemaker_session):
    byo = estimator.Estimator(
        image_name="byo",
        role="{{ role }}",
        train_instance_count="{{ instance_count }}",
        train_instance_type="ml.c4.2xlarge",
        sagemaker_session=sagemaker_session,
    )

    byo.set_hyperparameters(epochs=32, feature_dim=1024, mini_batch_size=256)

    data = {"train": "{{ training_data }}"}

    config = airflow.training_config(byo, data)
    expected_config = {
        "AlgorithmSpecification": {
            "TrainingImage": "byo",
            "TrainingInputMode": "File"
        },
        "OutputDataConfig": {
            "S3OutputPath": "s3://output/"
        },
        "TrainingJobName":
        "byo-%s" % TIME_STAMP,
        "StoppingCondition": {
            "MaxRuntimeInSeconds": 86400
        },
        "ResourceConfig": {
            "InstanceCount": "{{ instance_count }}",
            "InstanceType": "ml.c4.2xlarge",
            "VolumeSizeInGB": 30,
        },
        "RoleArn":
        "{{ role }}",
        "InputDataConfig": [{
            "DataSource": {
                "S3DataSource": {
                    "S3DataDistributionType": "FullyReplicated",
                    "S3DataType": "S3Prefix",
                    "S3Uri": "{{ training_data }}",
                }
            },
            "ChannelName": "train",
        }],
        "HyperParameters": {
            "epochs": "32",
            "feature_dim": "1024",
            "mini_batch_size": "256"
        },
    }
    assert config == expected_config
コード例 #15
0
def get_training_params(
    model_name,
    job_id,
    role,
    image_uri,
    training_uri,
    validation_uri,
    output_uri,
    hyperparameters,
    kms_key_id,
):
    # Create the estimator
    xgb = sagemaker.estimator.Estimator(
        image_uri,
        role,
        instance_count=1,
        instance_type="ml.m4.xlarge",
        output_path=output_uri,
    )
    # Set the hyperparameters overriding with any defaults
    params = {
        "alpha":"0.2",
        "max_depth":"10",
        "eta":"0.12",
        "gamma":"2.0",
        "min_child_weight":"8.5",
        "subsample":"0.6",
        "objective":"binary:logistic",
        "num_round":"20",
    }
    xgb.set_hyperparameters(**{**params, **hyperparameters})

    # Specify the data source
    s3_input_train = sagemaker.inputs.TrainingInput(
        s3_data=training_uri, content_type="csv"
    )
    s3_input_val = sagemaker.inputs.TrainingInput(
        s3_data=validation_uri, content_type="csv"
    )
    data = {"train": s3_input_train, "validation": s3_input_val}

    # Get the training request
    request = training_config(xgb, inputs=data, job_name=job_id)
    return {
        "Parameters": {
            "ModelName": model_name,
            "TrainJobId": job_id,
            "TrainJobRequest": json.dumps(request),
            "KmsKeyId": kms_key_id,
        }
    }
コード例 #16
0
def test_byo_training_config_required_args(sagemaker_session):
    job_name = get_job_name('byo')
    byo = estimator.Estimator(image_name="byo",
                              role="{{ role }}",
                              train_instance_count="{{ instance_count }}",
                              train_instance_type="ml.c4.2xlarge",
                              sagemaker_session=sagemaker_session)

    byo.set_hyperparameters(epochs=32, feature_dim=1024, mini_batch_size=256)

    data = {'train': "{{ training_data }}"}

    config = airflow.training_config(byo, data)
    expected_config = {
        'AlgorithmSpecification': {
            'TrainingImage': 'byo',
            'TrainingInputMode': 'File'
        },
        'OutputDataConfig': {
            'S3OutputPath': 's3://output/'
        },
        'TrainingJobName':
        job_name,
        'StoppingCondition': {
            'MaxRuntimeInSeconds': 86400
        },
        'ResourceConfig': {
            'InstanceCount': '{{ instance_count }}',
            'InstanceType': 'ml.c4.2xlarge',
            'VolumeSizeInGB': 30
        },
        'RoleArn':
        '{{ role }}',
        'InputDataConfig': [{
            'DataSource': {
                'S3DataSource': {
                    'S3DataDistributionType': 'FullyReplicated',
                    'S3DataType': 'S3Prefix',
                    'S3Uri': '{{ training_data }}'
                }
            },
            'ChannelName': 'train'
        }],
        'HyperParameters': {
            'epochs': '32',
            'feature_dim': '1024',
            'mini_batch_size': '256'
        }
    }
    assert config == expected_config
コード例 #17
0
def _build_airflow_workflow(estimator,
                            instance_type,
                            inputs=None,
                            mini_batch_size=None):
    training_config = sm_airflow.training_config(
        estimator=estimator, inputs=inputs, mini_batch_size=mini_batch_size)

    model = estimator.create_model()
    assert model is not None

    model_config = sm_airflow.model_config(instance_type, model)
    assert model_config is not None

    transform_config = sm_airflow.transform_config_from_estimator(
        estimator=estimator,
        task_id="transform_config",
        task_type="training",
        instance_count=SINGLE_INSTANCE_COUNT,
        instance_type=estimator.train_instance_type,
        data=inputs,
        content_type="text/csv",
        input_filter="$",
        output_filter="$",
    )

    default_args = {
        "owner": "airflow",
        "start_date": airflow.utils.dates.days_ago(2),
        "provide_context": True,
    }

    dag = DAG("tensorflow_example",
              default_args=default_args,
              schedule_interval="@once")

    train_op = SageMakerTrainingOperator(task_id="tf_training",
                                         config=training_config,
                                         wait_for_completion=True,
                                         dag=dag)

    transform_op = SageMakerTransformOperator(task_id="transform_operator",
                                              config=transform_config,
                                              wait_for_completion=True,
                                              dag=dag)

    transform_op.set_upstream(train_op)

    return training_config
コード例 #18
0
def get_training_params(model_name, job_id, role, image_uri, training_uri,
                        validation_uri, output_uri, hyperparameters):
    # Create the estimator
    xgb = sagemaker.estimator.Estimator(image_uri,
                                        role,
                                        train_instance_count=1,
                                        train_instance_type='ml.m4.xlarge',
                                        output_path=output_uri)
    # Set the hyperparameters overriding with any defaults
    params = {
        'max_depth': '9',
        'eta': '0.2',
        'gamma': '4',
        'min_child_weight': '300',
        'subsample': '0.8',
        'objective': 'reg:linear',
        'early_stopping_rounds': '10',
        'num_round': '100'
    }
    xgb.set_hyperparameters(**{**params, **hyperparameters})

    # Specify the data source
    s3_input_train = sagemaker.s3_input(s3_data=training_uri,
                                        content_type='csv')
    s3_input_val = sagemaker.s3_input(s3_data=validation_uri,
                                      content_type='csv')
    data = {'train': s3_input_train, 'validation': s3_input_val}

    # Get the training request
    request = training_config(xgb, inputs=data, job_name=job_id)
    return {
        "Parameters": {
            "ModelName": model_name,
            "TrainJobId": job_id,
            "TrainJobRequest": json.dumps(request),
        }
    }
コード例 #19
0
ファイル: run.py プロジェクト: hartl3y94/ml-models
def get_training_request(
    model_name,
    model_id,
    stage,
    role,
    image_uri,
    training_uri,
    training_bucket,
    hyperparameters,
):
    model_uri = "s3://{0}/{1}".format(training_bucket, model_name)

    # include location of tarfile and name of training script
    hyperparameters["sagemaker_program"] = "train.py"
    hyperparameters["sagemaker_submit_directory"] = model_uri+"/code/train.tar.gz"
    params = json_encode_hyperparameters(hyperparameters)

    # Create the estimator
    estimator = sagemaker.estimator.Estimator(
        image_uri,
        role,
        train_instance_count=1,
        train_instance_type="ml.c5.xlarge",
        base_job_name = model_name,
        output_path = model_uri+"/model",
        hyperparameters=params
    )
    # Specify the data source
    s3_input_train = sagemaker.inputs.TrainingInput(
        s3_data=training_uri, content_type="csv"
    )
    data = {"train": s3_input_train}

    # Get the training request
    request = training_config(estimator, inputs=data, job_name=get_training_job_name(model_name, model_id))
    return json.dumps(request)
コード例 #20
0
def test_framework_training_config_all_args(sagemaker_session):
    tf = tensorflow.TensorFlow(
        entry_point="{{ entry_point }}",
        source_dir="{{ source_dir }}",
        enable_cloudwatch_metrics=False,
        container_log_level="{{ log_level }}",
        code_location="{{ bucket_name }}",
        training_steps=1000,
        evaluation_steps=100,
        checkpoint_path="{{ checkpoint_path }}",
        py_version='py2',
        framework_version='1.10.0',
        requirements_file="",
        role="{{ role }}",
        train_instance_count="{{ instance_count }}",
        train_instance_type="ml.c4.2xlarge",
        train_volume_size="{{ train_volume_size }}",
        train_volume_kms_key="{{ train_volume_kms_key }}",
        train_max_run="{{ train_max_run }}",
        input_mode='Pipe',
        output_path="{{ output_path }}",
        output_kms_key="{{ output_volume_kms_key }}",
        base_job_name="{{ base_job_name }}",
        tags=[{"{{ key }}": "{{ value }}"}],
        subnets=["{{ subnet }}"],
        security_group_ids=["{{ security_group_ids }}"],
        sagemaker_session=sagemaker_session)

    data = "{{ training_data }}"

    config = airflow.training_config(tf, data)
    expected_config = {
        'AlgorithmSpecification': {
            'TrainingImage': '520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-tensorflow:1.10.0-cpu-py2',
            'TrainingInputMode': 'Pipe'
        },
        'OutputDataConfig': {
            'S3OutputPath': '{{ output_path }}',
            'KmsKeyId': '{{ output_volume_kms_key }}'
        },
        'TrainingJobName': "{{ base_job_name }}-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}",
        'StoppingCondition': {
            'MaxRuntimeInSeconds': '{{ train_max_run }}'
        },
        'ResourceConfig': {
            'InstanceCount': '{{ instance_count }}',
            'InstanceType': 'ml.c4.2xlarge',
            'VolumeSizeInGB': '{{ train_volume_size }}',
            'VolumeKmsKeyId': '{{ train_volume_kms_key }}'
        },
        'RoleArn': '{{ role }}',
        'InputDataConfig': [{
            'DataSource': {
                'S3DataSource': {
                    'S3DataDistributionType': 'FullyReplicated',
                    'S3DataType': 'S3Prefix',
                    'S3Uri': '{{ training_data }}'
                }
            },
            'ChannelName': 'training'
        }],
        'VpcConfig': {
            'Subnets': ['{{ subnet }}'],
            'SecurityGroupIds': ['{{ security_group_ids }}']
        },
        'HyperParameters': {
            'sagemaker_submit_directory': '"s3://{{ bucket_name }}/{{ base_job_name }}-'
                                          '{{ execution_date.strftime(\'%Y-%m-%d-%H-%M-%S\') }}'
                                          '/source/sourcedir.tar.gz"',
            'sagemaker_program': '"{{ entry_point }}"',
            'sagemaker_enable_cloudwatch_metrics': 'false',
            'sagemaker_container_log_level': '"{{ log_level }}"',
            'sagemaker_job_name': '"{{ base_job_name }}-{{ execution_date.strftime(\'%Y-%m-%d-%H-%M-%S\') }}"',
            'sagemaker_region': '"us-west-2"',
            'checkpoint_path': '"{{ checkpoint_path }}"',
            'training_steps': '1000',
            'evaluation_steps': '100',
            'sagemaker_requirements': '""'
        },
        'Tags': [{'{{ key }}': '{{ value }}'}],
        'S3Operations': {
            'S3Upload': [{
                'Path': '{{ source_dir }}',
                'Bucket': '{{ bucket_name }}',
                'Key': "{{ base_job_name }}-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}"
                       "/source/sourcedir.tar.gz",
                'Tar': True}]
        }
    }
    assert config == expected_config
コード例 #21
0
def test_framework_training_config_required_args(sagemaker_session):
    tf = tensorflow.TensorFlow(
        entry_point="{{ entry_point }}",
        framework_version='1.10.0',
        training_steps=1000,
        evaluation_steps=100,
        role="{{ role }}",
        train_instance_count="{{ instance_count }}",
        train_instance_type="ml.c4.2xlarge",
        sagemaker_session=sagemaker_session)

    data = "{{ training_data }}"

    config = airflow.training_config(tf, data)
    expected_config = {
        'AlgorithmSpecification': {
            'TrainingImage': '520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-tensorflow:1.10.0-cpu-py2',
            'TrainingInputMode': 'File'
        },
        'OutputDataConfig': {
            'S3OutputPath': 's3://output/'
        },
        'TrainingJobName': "sagemaker-tensorflow-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}",
        'StoppingCondition': {
            'MaxRuntimeInSeconds': 86400
        },
        'ResourceConfig': {
            'InstanceCount': '{{ instance_count }}',
            'InstanceType': 'ml.c4.2xlarge',
            'VolumeSizeInGB': 30
        },
        'RoleArn': '{{ role }}',
        'InputDataConfig': [{
            'DataSource': {
                'S3DataSource': {
                    'S3DataDistributionType': 'FullyReplicated',
                    'S3DataType': 'S3Prefix',
                    'S3Uri': '{{ training_data }}'
                }
            },
            'ChannelName': 'training'
        }],
        'HyperParameters': {
            'sagemaker_submit_directory': '"s3://output/sagemaker-tensorflow-'
                                          '{{ execution_date.strftime(\'%Y-%m-%d-%H-%M-%S\') }}'
                                          '/source/sourcedir.tar.gz"',
            'sagemaker_program': '"{{ entry_point }}"',
            'sagemaker_enable_cloudwatch_metrics': 'false',
            'sagemaker_container_log_level': '20',
            'sagemaker_job_name': '"sagemaker-tensorflow-{{ execution_date.strftime(\'%Y-%m-%d-%H-%M-%S\') }}"',
            'sagemaker_region': '"us-west-2"',
            'checkpoint_path': '"s3://output/sagemaker-tensorflow-{{ execution_date.strftime(\'%Y-%m-%d-%H-%M-%S\') }}'
                               '/checkpoints"',
            'training_steps': '1000',
            'evaluation_steps': '100',
            'sagemaker_requirements': '""'},
        'S3Operations': {
            'S3Upload': [{
                'Path': '{{ entry_point }}',
                'Bucket': 'output',
                'Key': "sagemaker-tensorflow-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}"
                       "/source/sourcedir.tar.gz",
                'Tar': True}]
        }
    }
    assert config == expected_config
コード例 #22
0
ファイル: dag_ml_pipeline.py プロジェクト: jjayp4rk/test-sm
xgb_container = get_image_uri(sess.region_name,
                              'xgboost',
                              repo_version="0.90-1")

xgb_estimator = Estimator(image_name=xgb_container,
                          role=role,
                          sagemaker_session=sagemaker.session.Session(sess),
                          **config["train_model"]["estimator_config"])

# train_config specifies SageMaker training configuration
train_data = create_s3_input(config['train_model']['inputs']['train'])
validation_data = create_s3_input(
    config['train_model']['inputs']['validation'])
data_channels = {'train': train_data, 'validation': validation_data}

train_config = training_config(estimator=xgb_estimator, inputs=data_channels)

# Batch inference
xgb_transformer = Transformer(
    model_name=config['batch_transform']['model_name'],
    sagemaker_session=sagemaker.session.Session(sess),
    **config['batch_transform']['transformer_config'])

transform_config = transform_config(
    transformer=xgb_transformer,
    **config['batch_transform']['transform_config'])

# =============================================================================
# define airflow DAG and tasks
# =============================================================================
# define airflow DAG
コード例 #23
0
ファイル: mock_ecs.py プロジェクト: leafsheep/GW-Rec-Release
region = config["job_level"]["region_name"]
sess = hook.get_session(region_name=region)
role = get_sagemaker_role_arn(config["job_level"]["sagemaker_role"],
                              sess.region_name)

# define KG estimator

# define DKN estimator
train_dkn_estimator = Estimator(
    image_name=
    '662566784674.dkr.ecr.ap-northeast-1.amazonaws.com/gw-dkn:20201114025113',
    role=role,
    sagemaker_session=sagemaker.session.Session(sess),
    **config["train_dkn"]["estimator_config"])

train_dkn_config = training_config(estimator=train_dkn_estimator,
                                   inputs=config["train_dkn"]["inputs"])


def mock_train(data, **context):
    S3ModelArtifacts = 's3://leigh-gw/dkn_model/dkn-2020-11-24-05-16-33-890/output/model.tar.gz'
    return S3ModelArtifacts


# trigger CDK to deploy model as ECS service using Airflow Python Operator
def task_def(data, **context):
    print('in deploy ...')
    model_key = context['ti'].xcom_pull(key='return_value')
    print(model_key)

    task_def = config["ecs_task_definition"]
    task_def['containerDefinitions'][0]['environment'][0]['value'] = model_key
コード例 #24
0
    def __init__(self,
                 state_id,
                 estimator,
                 job_name,
                 data=None,
                 hyperparameters=None,
                 mini_batch_size=None,
                 experiment_config=None,
                 wait_for_completion=True,
                 tags=None,
                 output_data_config_path=None,
                 **kwargs):
        """
        Args:
            state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine.
            estimator (sagemaker.estimator.EstimatorBase): The estimator for the training step. Can be a `BYO estimator, Framework estimator <https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms.html>`_ or `Amazon built-in algorithm estimator <https://docs.aws.amazon.com/sagemaker/latest/dg/algos.html>`_.
            job_name (str or Placeholder): Specify a training job name, this is required for the training job to run. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution.
            data: Information about the training data. Please refer to the ``fit()`` method of the associated estimator, as this can take any of the following forms:

                * (str or Placeholder) - The S3 location where training data is saved.
                * (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) - If using multiple
                    channels for training data, you can specify a dict mapping channel names to
                    strings or :func:`~sagemaker.inputs.TrainingInput` objects.
                * (sagemaker.inputs.TrainingInput) - Channel configuration for S3 data sources that can
                    provide additional information about the training dataset. See
                    :func:`sagemaker.inputs.TrainingInput` for full details.
                * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of
                    Amazon :class:`Record` objects serialized and stored in S3.
                    For use with an estimator for an Amazon algorithm.
                * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of
                    :class:`sagemaker.amazon.amazon_estimator.RecordSet` objects,
                    where each instance is a different channel of training data.
            hyperparameters: Parameters used for training.
                * (dict, optional) - Hyperparameters supplied will be merged with the Hyperparameters specified in the estimator.
                    If there are duplicate entries, the value provided through this property will be used. (Default: Hyperparameters specified in the estimator.)
                * (Placeholder, optional) - The TrainingStep will use the hyperparameters specified by the Placeholder's value instead of the hyperparameters specified in the estimator.
            mini_batch_size (int): Specify this argument only when estimator is a built-in estimator of an Amazon algorithm. For other estimators, batch size should be specified in the estimator.
            experiment_config (dict, optional): Specify the experiment config for the training. (Default: None)
            wait_for_completion (bool, optional): Boolean value set to `True` if the Task state should wait for the training job to complete before proceeding to the next step in the workflow. Set to `False` if the Task state should submit the training job and proceed to the next step. (default: True)
            tags (list[dict], optional): `List to tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource.
            output_data_config_path (str or Placeholder, optional): S3 location for saving the training result (model
                artifacts and output files). If specified, it overrides the `output_path` property of `estimator`.
        """
        self.estimator = estimator
        self.job_name = job_name

        if wait_for_completion:
            """
            Example resource arn: arn:aws:states:::sagemaker:createTrainingJob.sync
            """

            kwargs[Field.Resource.value] = get_service_integration_arn(
                SAGEMAKER_SERVICE_NAME, SageMakerApi.CreateTrainingJob,
                IntegrationPattern.WaitForCompletion)
        else:
            """
            Example resource arn: arn:aws:states:::sagemaker:createTrainingJob
            """

            kwargs[Field.Resource.value] = get_service_integration_arn(
                SAGEMAKER_SERVICE_NAME, SageMakerApi.CreateTrainingJob)
        # Convert `data` Placeholder to a JSONPath string because sagemaker.workflow.airflow.training_config does not
        # accept Placeholder in the `input` argument. We will suffix the 'S3Uri' key in `parameters` with ".$" later.
        is_data_placeholder = isinstance(data, Placeholder)
        if is_data_placeholder:
            data = data.to_jsonpath()

        if isinstance(job_name, str):
            parameters = training_config(estimator=estimator,
                                         inputs=data,
                                         job_name=job_name,
                                         mini_batch_size=mini_batch_size)
        else:
            parameters = training_config(estimator=estimator,
                                         inputs=data,
                                         mini_batch_size=mini_batch_size)

        if estimator.debugger_hook_config != None and estimator.debugger_hook_config is not False:
            parameters[
                'DebugHookConfig'] = estimator.debugger_hook_config._to_request_dict(
                )

        if estimator.rules != None:
            parameters['DebugRuleConfigurations'] = [
                rule.to_debugger_rule_config_dict() for rule in estimator.rules
            ]

        if isinstance(job_name, Placeholder):
            parameters['TrainingJobName'] = job_name

        if output_data_config_path is not None:
            parameters['OutputDataConfig'][
                'S3OutputPath'] = output_data_config_path

        if data is not None and is_data_placeholder:
            # Replace the 'S3Uri' key with one that supports JSONpath value.
            # Support for uri str only: The list will only contain 1 element
            data_uri = parameters['InputDataConfig'][0]['DataSource'][
                'S3DataSource'].pop('S3Uri', None)
            parameters['InputDataConfig'][0]['DataSource']['S3DataSource'][
                'S3Uri.$'] = data_uri

        if hyperparameters is not None:
            if not isinstance(hyperparameters, Placeholder):
                if estimator.hyperparameters() is not None:
                    hyperparameters = self.__merge_hyperparameters(
                        hyperparameters, estimator.hyperparameters())
            parameters['HyperParameters'] = hyperparameters

        if experiment_config is not None:
            parameters['ExperimentConfig'] = experiment_config

        if 'S3Operations' in parameters:
            del parameters['S3Operations']

        if tags:
            parameters['Tags'] = tags_dict_to_kv_list(tags)

        kwargs[Field.Parameters.value] = parameters
        super(TrainingStep, self).__init__(state_id, **kwargs)
コード例 #25
0
ファイル: custom_steps.py プロジェクト: kranthigy/mlmax
    def __init__(
        self,
        state_id,
        estimator,
        job_name,
        data=None,
        hyperparameters=None,
        mini_batch_size=None,
        experiment_config=None,
        wait_for_completion=True,
        tags=None,
        train_data=None,
        test_data=None,
        sm_submit_url=None,
        sm_region=None,
        sm_output_data=None,
        sm_debug_output_data=None,
        **kwargs,
    ):
        """
        Args:
            state_id (str): State name whose length **must be** less than or
            equal to 128 unicode characters. State names **must be** unique
            within the scope of the whole state machine.  estimator
            (sagemaker.estimator.EstimatorBase): The estimator for the training
            step. Can be a `BYO estimator, Framework estimator
            <https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms.html>`_
            or `Amazon built-in algorithm estimator
            <https://docs.aws.amazon.com/sagemaker/latest/dg/algos.html>`_.
            job_name (str or Placeholder): Specify a training job name, this is
            required for the training job to run. We recommend to use
            :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder
            collection to pass the value dynamically in each execution.  data:
            Information about the training data. Please refer to the ``fit()``
            method of the associated estimator, as this can take any of the
            following forms:

                * (str) - The S3 location where training data is saved.
                * (dict[str, str] or dict[str, sagemaker.session.s3_input]) -
                    If using multiple channels for training data, you can specify a
                    dict mapping channel names to strings or
                    :func:`~sagemaker.session.s3_input` objects.
                * (sagemaker.session.s3_input) - Channel configuration for S3
                    data sources that can provide additional information about the
                    training dataset. See :func:`sagemaker.session.s3_input` for
                    full details.
                * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of
                    Amazon :class:`Record` objects serialized and stored in S3.
                    For use with an estimator for an Amazon algorithm.
                * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of
                    :class:`sagemaker.amazon.amazon_estimator.RecordSet` objects,
                    where each instance is a different channel of training data.
            hyperparameters (dict, optional): Specify the hyper parameters for
            the training. (Default: None)
            mini_batch_size (int): Specify this argument only when estimator is
            a built-in estimator of an Amazon algorithm. For other estimators,
            batch size should be specified in the estimator.
            experiment_config (dict, optional): Specify the experiment config
            for the training. (Default: None)
            wait_for_completion (bool, optional): Boolean value set to `True`
            if the Task state should wait for the training job to complete
            before proceeding to the next step in the workflow. Set to `False`
            if the Task state should submit the training job and proceed to the
            next step. (default: True)
            tags (list[dict], optional): `List to tags
            <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to
            associate with the resource.
        """
        self.estimator = estimator
        self.job_name = job_name

        if wait_for_completion:
            kwargs[
                Field.Resource.value
            ] = "arn:aws:states:::sagemaker:createTrainingJob.sync"
        else:
            kwargs[
                Field.Resource.value
            ] = "arn:aws:states:::sagemaker:createTrainingJob"

        if isinstance(job_name, str):
            parameters = training_config(
                estimator=estimator,
                inputs=data,
                job_name=job_name,
                mini_batch_size=mini_batch_size,
            )
        else:
            parameters = training_config(
                estimator=estimator, inputs=data, mini_batch_size=mini_batch_size
            )

        if data is None and train_data is not None and test_data is not None:
            if isinstance(train_data, (ExecutionInput, StepInput)) and isinstance(
                test_data, (ExecutionInput, StepInput)
            ):
                parameters["InputDataConfig"] = [
                    {
                        "DataSource": {
                            "S3DataSource": {
                                "S3DataType": "S3Prefix",
                                "S3Uri": train_data,
                                "S3DataDistributionType": "FullyReplicated",
                            }
                        },
                        "ChannelName": "train",
                    },
                    {
                        "DataSource": {
                            "S3DataSource": {
                                "S3DataType": "S3Prefix",
                                "S3Uri": test_data,
                                "S3DataDistributionType": "FullyReplicated",
                            }
                        },
                        "ChannelName": "test",
                    },
                ]

        if sm_output_data is not None:
            parameters["OutputDataConfig"]["S3OutputPath"] = sm_output_data

        if estimator.debugger_hook_config is not None:
            parameters[
                "DebugHookConfig"
            ] = estimator.debugger_hook_config._to_request_dict()

        if estimator.rules is not None:
            parameters["DebugRuleConfigurations"] = [
                rule.to_debugger_rule_config_dict() for rule in estimator.rules
            ]

        if sm_debug_output_data is not None:
            parameters["DebugHookConfig"]["S3OutputPath"] = sm_debug_output_data

        if isinstance(job_name, (ExecutionInput, StepInput)):
            parameters["TrainingJobName"] = job_name

        if hyperparameters is not None:
            if "HyperParameters" in parameters:
                # try to void overwriting reserved hyperparameters:
                # github.com/aws/sagemaker-training-toolkit/blob/
                # master/src/sagemaker_training/params.py
                parameters["HyperParameters"].update(hyperparameters)
            else:
                parameters["HyperParameters"] = hyperparameters

        if isinstance(job_name, (ExecutionInput, StepInput)):
            parameters["HyperParameters"]["sagemaker_job_name"] = job_name

        if sm_submit_url is not None and isinstance(
            sm_submit_url, (ExecutionInput, StepInput)
        ):
            parameters["HyperParameters"]["sagemaker_submit_directory"] = sm_submit_url

        if sm_region is not None and isinstance(sm_region, (ExecutionInput, StepInput)):
            parameters["HyperParameters"]["sagemaker_region"] = sm_region

        if experiment_config is not None:
            parameters["ExperimentConfig"] = experiment_config

        if "S3Operations" in parameters:
            del parameters["S3Operations"]

        if tags:
            parameters["Tags"] = tags_dict_to_kv_list(tags)

        kwargs[Field.Parameters.value] = parameters
        # print(kwargs)
        super(MLMaxTrainingStep, self).__init__(state_id, **kwargs)
コード例 #26
0
def test_amazon_alg_training_config_all_args(sagemaker_session):
    ntm_estimator = ntm.NTM(
        role="{{ role }}",
        num_topics=10,
        train_instance_count="{{ instance_count }}",
        train_instance_type="ml.c4.2xlarge",
        train_volume_size="{{ train_volume_size }}",
        train_volume_kms_key="{{ train_volume_kms_key }}",
        train_max_run="{{ train_max_run }}",
        input_mode='Pipe',
        output_path="{{ output_path }}",
        output_kms_key="{{ output_volume_kms_key }}",
        base_job_name="{{ base_job_name }}",
        tags=[{"{{ key }}": "{{ value }}"}],
        subnets=["{{ subnet }}"],
        security_group_ids=["{{ security_group_ids }}"],
        sagemaker_session=sagemaker_session)

    ntm_estimator.epochs = 32
    ntm_estimator.mini_batch_size = 256

    record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, 'S3Prefix')

    config = airflow.training_config(ntm_estimator, record)
    expected_config = {
        'AlgorithmSpecification': {
            'TrainingImage': '174872318107.dkr.ecr.us-west-2.amazonaws.com/ntm:1',
            'TrainingInputMode': 'Pipe'
        },
        'OutputDataConfig': {
            'S3OutputPath': '{{ output_path }}',
            'KmsKeyId': '{{ output_volume_kms_key }}'
        },
        'TrainingJobName': "{{ base_job_name }}-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}",
        'StoppingCondition': {
            'MaxRuntimeInSeconds': '{{ train_max_run }}'
        },
        'ResourceConfig': {
            'InstanceCount': '{{ instance_count }}',
            'InstanceType': 'ml.c4.2xlarge',
            'VolumeSizeInGB': '{{ train_volume_size }}',
            'VolumeKmsKeyId': '{{ train_volume_kms_key }}'
        },
        'RoleArn': '{{ role }}',
        'InputDataConfig': [{
            'DataSource': {
                'S3DataSource': {
                    'S3DataDistributionType': 'ShardedByS3Key',
                    'S3DataType': 'S3Prefix',
                    'S3Uri': '{{ record }}'
                }
            },
            'ChannelName': 'train'
        }],
        'VpcConfig': {
            'Subnets': ['{{ subnet }}'],
            'SecurityGroupIds': ['{{ security_group_ids }}']
        },
        'HyperParameters': {
            'num_topics': '10',
            'epochs': '32',
            'mini_batch_size': '256',
            'feature_dim': '100'
        },
        'Tags': [{'{{ key }}': '{{ value }}'}]
    }

    assert config == expected_config
コード例 #27
0
def test_byo_training_config_all_args(sagemaker_session):
    byo = estimator.Estimator(
        image_name="byo",
        role="{{ role }}",
        train_instance_count="{{ instance_count }}",
        train_instance_type="ml.c4.2xlarge",
        train_volume_size="{{ train_volume_size }}",
        train_volume_kms_key="{{ train_volume_kms_key }}",
        train_max_run="{{ train_max_run }}",
        input_mode='Pipe',
        output_path="{{ output_path }}",
        output_kms_key="{{ output_volume_kms_key }}",
        base_job_name="{{ base_job_name }}",
        tags=[{"{{ key }}": "{{ value }}"}],
        subnets=["{{ subnet }}"],
        security_group_ids=["{{ security_group_ids }}"],
        model_uri="{{ model_uri }}",
        model_channel_name="{{ model_chanel }}",
        sagemaker_session=sagemaker_session)

    byo.set_hyperparameters(epochs=32,
                            feature_dim=1024,
                            mini_batch_size=256)

    data = {'train': "{{ training_data }}"}

    config = airflow.training_config(byo, data)
    expected_config = {
        'AlgorithmSpecification': {
            'TrainingImage': 'byo',
            'TrainingInputMode': 'Pipe'
        },
        'OutputDataConfig': {
            'S3OutputPath': '{{ output_path }}',
            'KmsKeyId': '{{ output_volume_kms_key }}'
        },
        'TrainingJobName': "{{ base_job_name }}-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}",
        'StoppingCondition': {
            'MaxRuntimeInSeconds': '{{ train_max_run }}'
        },
        'ResourceConfig': {
            'InstanceCount': '{{ instance_count }}',
            'InstanceType': 'ml.c4.2xlarge',
            'VolumeSizeInGB': '{{ train_volume_size }}',
            'VolumeKmsKeyId': '{{ train_volume_kms_key }}'
        },
        'RoleArn': '{{ role }}',
        'InputDataConfig': [
            {
                'DataSource': {
                    'S3DataSource': {
                        'S3DataDistributionType': 'FullyReplicated',
                        'S3DataType': 'S3Prefix',
                        'S3Uri': '{{ training_data }}'
                    }
                },
                'ChannelName': 'train'
            },
            {
                'DataSource': {
                    'S3DataSource': {
                        'S3DataDistributionType': 'FullyReplicated',
                        'S3DataType': 'S3Prefix',
                        'S3Uri': '{{ model_uri }}'
                    }
                },
                'ContentType': 'application/x-sagemaker-model',
                'InputMode': 'File',
                'ChannelName': '{{ model_chanel }}'
            }
        ],
        'VpcConfig': {
            'Subnets': ['{{ subnet }}'],
            'SecurityGroupIds': ['{{ security_group_ids }}']
        },
        'HyperParameters': {
            'epochs': '32',
            'feature_dim': '1024',
            'mini_batch_size': '256'},
        'Tags': [{'{{ key }}': '{{ value }}'}]
    }
    assert config == expected_config
コード例 #28
0
    'checkpointPath': '/opt/ml/checkpoints'
}

byoc_est = sagemaker.estimator.Estimator(
    '662566784674.dkr.ecr.ap-northeast-1.amazonaws.com/gw-dkn:20201114025113',
    role=sagemaker.get_execution_role(),
    train_instance_count=1,
    train_instance_type=train_instance_type,
    base_job_name='dkn-byoc',
    hyperparameters=hyperparameters)

train_s3 = "s3://leigh-gw/train.csv/"
test_s3 = "s3://leigh-gw/test.csv/"
inputs = {'train': train_s3, 'eval': test_s3}

train_config = training_config(estimator=byoc_est, inputs=inputs)


# step - trigger CDK to deploy model as ECS service using Airflow Python Operator
def dkn_model_deploy(data, **context):
    print("mock for dkn deployment")


default_args = {
    'owner': 'airflow',
    'start_date': airflow.utils.dates.days_ago(2),
    'provide_context': True
}

dag = DAG('tensorflow_example',
          default_args=default_args,
コード例 #29
0
hook = AwsHook(aws_conn_id='airflow-sagemaker')
region = config["job_level"]["region_name"]
sess = hook.get_session(region_name=region)
role = get_sagemaker_role_arn(config["train_model"]["sagemaker_role"],
                              sess.region_name)
container = get_image_uri(sess.region_name, 'factorization-machines')
hpo_enabled = is_hpo_enabled()

# create estimator
fm_estimator = Estimator(image_name=container,
                         role=role,
                         sagemaker_session=sagemaker.session.Session(sess),
                         **config["train_model"]["estimator_config"])

# train_config specifies SageMaker training configuration
train_config = training_config(estimator=fm_estimator,
                               inputs=config["train_model"]["inputs"])

# create tuner
fm_tuner = HyperparameterTuner(estimator=fm_estimator,
                               **config["tune_model"]["tuner_config"])

# create tuning config
tuner_config = tuning_config(tuner=fm_tuner,
                             inputs=config["tune_model"]["inputs"])

# create transform config
transform_config = transform_config_from_estimator(
    estimator=fm_estimator,
    task_id="model_tuning" if hpo_enabled else "model_training",
    task_type="tuning" if hpo_enabled else "training",
    **config["batch_transform"]["transform_config"])
コード例 #30
0
ファイル: dag_rl.py プロジェクト: richardliaw/airflow-rl
        # 3 m4.2xl with 8 cores each. We have to leave 1 core for ray scheduler.
        # Don't forget to change this on the basis of instance type.
        "rl.training.config.num_workers": (8 * 2) - 1

      #"rl.training.config.horizon": 5000,
      #"rl.training.config.num_sgd_iter": 10,
    }
)

# estimator.fit(wait=local_mode)
# job_name = estimator.latest_training_job.job_name
# print("Training job: %s" % job_name)

# train_config specifies SageMaker training configuration
train_config = training_config(
    estimator=estimator,
    inputs="s3://sagemaker-us-east-1-450145409201/sagemaker/DEMO-pytorch-mnist")  # MOCK
# inputs=config["train_model"]["inputs"])

# # create tuner
# fm_tuner = HyperparameterTuner(
#     estimator=fm_estimator,
#     **config["tune_model"]["tuner_config"]
# )

# # create tuning config
# tuner_config = tuning_config(
#     tuner=fm_tuner,
#     inputs=config["tune_model"]["inputs"])

# # create transform config