def test_transform_config_from_amazon_alg_estimator(sagemaker_session):
    knn_estimator = knn.KNN(
        role="{{ role }}",
        train_instance_count="{{ instance_count }}",
        train_instance_type="ml.m4.xlarge",
        k=16,
        sample_size=128,
        predictor_type="regressor",
        sagemaker_session=sagemaker_session,
    )

    record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, "S3Prefix")
    transform_data = "{{ transform_data }}"

    # simulate training
    airflow.training_config(knn_estimator, record, mini_batch_size=256)

    config = airflow.transform_config_from_estimator(
        estimator=knn_estimator,
        task_id="task_id",
        task_type="training",
        instance_count="{{ instance_count }}",
        instance_type="ml.p2.xlarge",
        data=transform_data,
    )
    expected_config = {
        "Model": {
            "ModelName": "knn-%s" % TIME_STAMP,
            "PrimaryContainer": {
                "Image":
                "174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1",
                "Environment": {},
                "ModelDataUrl":
                "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Training']['TrainingJobName'] }}"
                "/output/model.tar.gz",
            },
            "ExecutionRoleArn": "{{ role }}",
        },
        "Transform": {
            "TransformJobName": "knn-%s" % TIME_STAMP,
            "ModelName": "knn-%s" % TIME_STAMP,
            "TransformInput": {
                "DataSource": {
                    "S3DataSource": {
                        "S3DataType": "S3Prefix",
                        "S3Uri": "{{ transform_data }}"
                    }
                }
            },
            "TransformOutput": {
                "S3OutputPath": "s3://output/knn-%s" % TIME_STAMP
            },
            "TransformResources": {
                "InstanceCount": "{{ instance_count }}",
                "InstanceType": "ml.p2.xlarge",
            },
        },
    }

    assert config == expected_config
def test_model_config_from_amazon_alg_estimator(sagemaker_session):
    knn_estimator = knn.KNN(
        role="{{ role }}",
        train_instance_count="{{ instance_count }}",
        train_instance_type="ml.m4.xlarge",
        k=16,
        sample_size=128,
        predictor_type="regressor",
        sagemaker_session=sagemaker_session,
    )

    record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, "S3Prefix")

    # simulate training
    airflow.training_config(knn_estimator, record, mini_batch_size=256)

    config = airflow.model_config_from_estimator(instance_type="ml.c4.xlarge",
                                                 estimator=knn_estimator,
                                                 task_id="task_id",
                                                 task_type="tuning")
    expected_config = {
        "ModelName": "knn-%s" % TIME_STAMP,
        "PrimaryContainer": {
            "Image":
            "174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1",
            "Environment": {},
            "ModelDataUrl":
            "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Tuning']['BestTrainingJob']"
            "['TrainingJobName'] }}/output/model.tar.gz",
        },
        "ExecutionRoleArn": "{{ role }}",
    }

    assert config == expected_config
Example #3
0
def test_model_config_from_amazon_alg_estimator(sagemaker_session):
    knn_estimator = knn.KNN(role="{{ role }}",
                            train_instance_count="{{ instance_count }}",
                            train_instance_type='ml.m4.xlarge',
                            k=16,
                            sample_size=128,
                            predictor_type='regressor',
                            sagemaker_session=sagemaker_session)

    record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, 'S3Prefix')

    # simulate training
    airflow.training_config(knn_estimator, record, mini_batch_size=256)

    config = airflow.model_config_from_estimator(instance_type='ml.c4.xlarge',
                                                 estimator=knn_estimator,
                                                 task_id='task_id',
                                                 task_type='tuning')
    expected_config = {
        'ModelName': "knn-%s" % TIME_STAMP,
        'PrimaryContainer': {
            'Image':
            '174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1',
            'Environment': {},
            'ModelDataUrl':
            "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Tuning']['BestTrainingJob']"
            "['TrainingJobName'] }}/output/model.tar.gz"
        },
        'ExecutionRoleArn': '{{ role }}'
    }

    assert config == expected_config
Example #4
0
def test_model_config_from_amazon_alg_estimator(sagemaker_session):
    job_name = get_job_name('knn')
    knn_estimator = knn.KNN(role="{{ role }}",
                            train_instance_count="{{ instance_count }}",
                            train_instance_type='ml.m4.xlarge',
                            k=16,
                            sample_size=128,
                            predictor_type='regressor',
                            sagemaker_session=sagemaker_session)

    record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, 'S3Prefix')

    # simulate training
    airflow.training_config(knn_estimator, record, mini_batch_size=256)

    config = airflow.model_config_from_estimator(instance_type='ml.c4.xlarge',
                                                 estimator=knn_estimator)
    expected_config = {
        'ModelName': job_name,
        'PrimaryContainer': {
            'Image': '174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1',
            'Environment': {},
            'ModelDataUrl':
            "s3://output/{}/output/model.tar.gz".format(job_name)
        },
        'ExecutionRoleArn': '{{ role }}'
    }

    assert config == expected_config
def test_transform_config_from_amazon_alg_estimator(sagemaker_session):
    knn_estimator = knn.KNN(role="{{ role }}",
                            train_instance_count="{{ instance_count }}",
                            train_instance_type='ml.m4.xlarge',
                            k=16,
                            sample_size=128,
                            predictor_type='regressor',
                            sagemaker_session=sagemaker_session)

    record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, 'S3Prefix')
    transform_data = "{{ transform_data }}"

    # simulate training
    airflow.training_config(knn_estimator, record, mini_batch_size=256)

    config = airflow.transform_config_from_estimator(
        estimator=knn_estimator,
        instance_count="{{ instance_count }}",
        instance_type="ml.p2.xlarge",
        data=transform_data)
    expected_config = {
        'Model': {
            'ModelName':
            "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}",
            'PrimaryContainer': {
                'Image':
                '174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1',
                'Environment': {},
                'ModelDataUrl':
                "s3://output/knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}"
                "/output/model.tar.gz"
            },
            'ExecutionRoleArn': '{{ role }}'
        },
        'Transform': {
            'TransformJobName':
            "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}",
            'ModelName':
            "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}",
            'TransformInput': {
                'DataSource': {
                    'S3DataSource': {
                        'S3DataType': 'S3Prefix',
                        'S3Uri': '{{ transform_data }}'
                    }
                }
            },
            'TransformOutput': {
                'S3OutputPath':
                "s3://output/knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}"
            },
            'TransformResources': {
                'InstanceCount': '{{ instance_count }}',
                'InstanceType': 'ml.p2.xlarge'
            }
        }
    }

    assert config == expected_config
def test_amazon_alg_training_config_required_args(sagemaker_session):
    ntm_estimator = ntm.NTM(
        role="{{ role }}",
        num_topics=10,
        train_instance_count="{{ instance_count }}",
        train_instance_type="ml.c4.2xlarge",
        sagemaker_session=sagemaker_session,
    )

    ntm_estimator.epochs = 32

    record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, "S3Prefix")

    config = airflow.training_config(ntm_estimator,
                                     record,
                                     mini_batch_size=256)
    expected_config = {
        "AlgorithmSpecification": {
            "TrainingImage":
            "174872318107.dkr.ecr.us-west-2.amazonaws.com/ntm:1",
            "TrainingInputMode": "File",
        },
        "OutputDataConfig": {
            "S3OutputPath": "s3://output/"
        },
        "TrainingJobName":
        "ntm-%s" % TIME_STAMP,
        "StoppingCondition": {
            "MaxRuntimeInSeconds": 86400
        },
        "ResourceConfig": {
            "InstanceCount": "{{ instance_count }}",
            "InstanceType": "ml.c4.2xlarge",
            "VolumeSizeInGB": 30,
        },
        "RoleArn":
        "{{ role }}",
        "InputDataConfig": [{
            "DataSource": {
                "S3DataSource": {
                    "S3DataDistributionType": "ShardedByS3Key",
                    "S3DataType": "S3Prefix",
                    "S3Uri": "{{ record }}",
                }
            },
            "ChannelName": "train",
        }],
        "HyperParameters": {
            "num_topics": "10",
            "epochs": "32",
            "mini_batch_size": "256",
            "feature_dim": "100",
        },
    }
    assert config == expected_config
Example #7
0
def test_amazon_alg_training_config_required_args(sagemaker_session):
    job_name = get_job_name('ntm')
    ntm_estimator = ntm.NTM(role="{{ role }}",
                            num_topics=10,
                            train_instance_count="{{ instance_count }}",
                            train_instance_type="ml.c4.2xlarge",
                            sagemaker_session=sagemaker_session)

    ntm_estimator.epochs = 32

    record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, 'S3Prefix')

    config = airflow.training_config(ntm_estimator,
                                     record,
                                     mini_batch_size=256)
    expected_config = {
        'AlgorithmSpecification': {
            'TrainingImage':
            '174872318107.dkr.ecr.us-west-2.amazonaws.com/ntm:1',
            'TrainingInputMode': 'File'
        },
        'OutputDataConfig': {
            'S3OutputPath': 's3://output/'
        },
        'TrainingJobName':
        job_name,
        'StoppingCondition': {
            'MaxRuntimeInSeconds': 86400
        },
        'ResourceConfig': {
            'InstanceCount': '{{ instance_count }}',
            'InstanceType': 'ml.c4.2xlarge',
            'VolumeSizeInGB': 30
        },
        'RoleArn':
        '{{ role }}',
        'InputDataConfig': [{
            'DataSource': {
                'S3DataSource': {
                    'S3DataDistributionType': 'ShardedByS3Key',
                    'S3DataType': 'S3Prefix',
                    'S3Uri': '{{ record }}'
                }
            },
            'ChannelName': 'train'
        }],
        'HyperParameters': {
            'num_topics': '10',
            'epochs': '32',
            'mini_batch_size': '256',
            'feature_dim': '100'
        }
    }
    assert config == expected_config
def test_deploy_config_from_amazon_alg_estimator(sagemaker_session):
    knn_estimator = knn.KNN(role="{{ role }}",
                            train_instance_count="{{ instance_count }}",
                            train_instance_type='ml.m4.xlarge',
                            k=16,
                            sample_size=128,
                            predictor_type='regressor',
                            sagemaker_session=sagemaker_session)

    record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, 'S3Prefix')

    # simulate training
    airflow.training_config(knn_estimator, record, mini_batch_size=256)

    config = airflow.deploy_config_from_estimator(
        estimator=knn_estimator,
        initial_instance_count="{{ instance_count }}",
        instance_type="ml.p2.xlarge")
    expected_config = {
        'Model': {
            'ModelName':
            "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}",
            'PrimaryContainer': {
                'Image':
                '174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1',
                'Environment': {},
                'ModelDataUrl':
                "s3://output/knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}"
                "/output/model.tar.gz"
            },
            'ExecutionRoleArn': '{{ role }}'
        },
        'EndpointConfig': {
            'EndpointConfigName':
            "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}",
            'ProductionVariants': [{
                'InstanceType': 'ml.p2.xlarge',
                'InitialInstanceCount': '{{ instance_count }}',
                'ModelName':
                "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}",
                'VariantName': 'AllTraffic',
                'InitialVariantWeight': 1
            }]
        },
        'Endpoint': {
            'EndpointName':
            "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}",
            'EndpointConfigName':
            "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}"
        }
    }

    assert config == expected_config
Example #9
0
def test_amazon_alg_training_config_all_args(sagemaker_session):
    ntm_estimator = ntm.NTM(
        role="{{ role }}",
        num_topics=10,
        train_instance_count="{{ instance_count }}",
        train_instance_type="ml.c4.2xlarge",
        train_volume_size="{{ train_volume_size }}",
        train_volume_kms_key="{{ train_volume_kms_key }}",
        train_max_run="{{ train_max_run }}",
        input_mode='Pipe',
        output_path="{{ output_path }}",
        output_kms_key="{{ output_volume_kms_key }}",
        base_job_name="{{ base_job_name }}",
        tags=[{"{{ key }}": "{{ value }}"}],
        subnets=["{{ subnet }}"],
        security_group_ids=["{{ security_group_ids }}"],
        sagemaker_session=sagemaker_session)

    ntm_estimator.epochs = 32
    ntm_estimator.mini_batch_size = 256

    record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, 'S3Prefix')

    config = airflow.training_config(ntm_estimator, record)
    expected_config = {
        'AlgorithmSpecification': {
            'TrainingImage': '174872318107.dkr.ecr.us-west-2.amazonaws.com/ntm:1',
            'TrainingInputMode': 'Pipe'
        },
        'OutputDataConfig': {
            'S3OutputPath': '{{ output_path }}',
            'KmsKeyId': '{{ output_volume_kms_key }}'
        },
        'TrainingJobName': "{{ base_job_name }}-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}",
        'StoppingCondition': {
            'MaxRuntimeInSeconds': '{{ train_max_run }}'
        },
        'ResourceConfig': {
            'InstanceCount': '{{ instance_count }}',
            'InstanceType': 'ml.c4.2xlarge',
            'VolumeSizeInGB': '{{ train_volume_size }}',
            'VolumeKmsKeyId': '{{ train_volume_kms_key }}'
        },
        'RoleArn': '{{ role }}',
        'InputDataConfig': [{
            'DataSource': {
                'S3DataSource': {
                    'S3DataDistributionType': 'ShardedByS3Key',
                    'S3DataType': 'S3Prefix',
                    'S3Uri': '{{ record }}'
                }
            },
            'ChannelName': 'train'
        }],
        'VpcConfig': {
            'Subnets': ['{{ subnet }}'],
            'SecurityGroupIds': ['{{ security_group_ids }}']
        },
        'HyperParameters': {
            'num_topics': '10',
            'epochs': '32',
            'mini_batch_size': '256',
            'feature_dim': '100'
        },
        'Tags': [{'{{ key }}': '{{ value }}'}]
    }

    assert config == expected_config
def test_amazon_alg_training_config_all_args(sagemaker_session):
    ntm_estimator = ntm.NTM(
        role="{{ role }}",
        num_topics=10,
        train_instance_count="{{ instance_count }}",
        train_instance_type="ml.c4.2xlarge",
        train_volume_size="{{ train_volume_size }}",
        train_volume_kms_key="{{ train_volume_kms_key }}",
        train_max_run="{{ train_max_run }}",
        input_mode="Pipe",
        output_path="{{ output_path }}",
        output_kms_key="{{ output_volume_kms_key }}",
        base_job_name="{{ base_job_name }}",
        tags=[{
            "{{ key }}": "{{ value }}"
        }],
        subnets=["{{ subnet }}"],
        security_group_ids=["{{ security_group_ids }}"],
        sagemaker_session=sagemaker_session,
    )

    ntm_estimator.epochs = 32

    record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, "S3Prefix")

    config = airflow.training_config(ntm_estimator,
                                     record,
                                     mini_batch_size=256)
    expected_config = {
        "AlgorithmSpecification": {
            "TrainingImage":
            "174872318107.dkr.ecr.us-west-2.amazonaws.com/ntm:1",
            "TrainingInputMode": "Pipe",
        },
        "OutputDataConfig": {
            "S3OutputPath": "{{ output_path }}",
            "KmsKeyId": "{{ output_volume_kms_key }}",
        },
        "TrainingJobName":
        "{{ base_job_name }}-%s" % TIME_STAMP,
        "StoppingCondition": {
            "MaxRuntimeInSeconds": "{{ train_max_run }}"
        },
        "ResourceConfig": {
            "InstanceCount": "{{ instance_count }}",
            "InstanceType": "ml.c4.2xlarge",
            "VolumeSizeInGB": "{{ train_volume_size }}",
            "VolumeKmsKeyId": "{{ train_volume_kms_key }}",
        },
        "RoleArn":
        "{{ role }}",
        "InputDataConfig": [{
            "DataSource": {
                "S3DataSource": {
                    "S3DataDistributionType": "ShardedByS3Key",
                    "S3DataType": "S3Prefix",
                    "S3Uri": "{{ record }}",
                }
            },
            "ChannelName": "train",
        }],
        "VpcConfig": {
            "Subnets": ["{{ subnet }}"],
            "SecurityGroupIds": ["{{ security_group_ids }}"],
        },
        "HyperParameters": {
            "num_topics": "10",
            "epochs": "32",
            "mini_batch_size": "256",
            "feature_dim": "100",
        },
        "Tags": [{
            "{{ key }}": "{{ value }}"
        }],
    }

    assert config == expected_config