def test_model_config_from_amazon_alg_estimator(sagemaker_session): knn_estimator = knn.KNN( role="{{ role }}", train_instance_count="{{ instance_count }}", train_instance_type="ml.m4.xlarge", k=16, sample_size=128, predictor_type="regressor", sagemaker_session=sagemaker_session, ) record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, "S3Prefix") # simulate training airflow.training_config(knn_estimator, record, mini_batch_size=256) config = airflow.model_config_from_estimator(instance_type="ml.c4.xlarge", estimator=knn_estimator, task_id="task_id", task_type="tuning") expected_config = { "ModelName": "knn-%s" % TIME_STAMP, "PrimaryContainer": { "Image": "174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1", "Environment": {}, "ModelDataUrl": "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Tuning']['BestTrainingJob']" "['TrainingJobName'] }}/output/model.tar.gz", }, "ExecutionRoleArn": "{{ role }}", } assert config == expected_config
def test_model_config_from_amazon_alg_estimator(sagemaker_session): knn_estimator = knn.KNN(role="{{ role }}", train_instance_count="{{ instance_count }}", train_instance_type='ml.m4.xlarge', k=16, sample_size=128, predictor_type='regressor', sagemaker_session=sagemaker_session) record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, 'S3Prefix') # simulate training airflow.training_config(knn_estimator, record, mini_batch_size=256) config = airflow.model_config_from_estimator(instance_type='ml.c4.xlarge', estimator=knn_estimator, task_id='task_id', task_type='tuning') expected_config = { 'ModelName': "knn-%s" % TIME_STAMP, 'PrimaryContainer': { 'Image': '174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1', 'Environment': {}, 'ModelDataUrl': "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Tuning']['BestTrainingJob']" "['TrainingJobName'] }}/output/model.tar.gz" }, 'ExecutionRoleArn': '{{ role }}' } assert config == expected_config
def test_model_config_from_amazon_alg_estimator(sagemaker_session): job_name = get_job_name('knn') knn_estimator = knn.KNN(role="{{ role }}", train_instance_count="{{ instance_count }}", train_instance_type='ml.m4.xlarge', k=16, sample_size=128, predictor_type='regressor', sagemaker_session=sagemaker_session) record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, 'S3Prefix') # simulate training airflow.training_config(knn_estimator, record, mini_batch_size=256) config = airflow.model_config_from_estimator(instance_type='ml.c4.xlarge', estimator=knn_estimator) expected_config = { 'ModelName': job_name, 'PrimaryContainer': { 'Image': '174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1', 'Environment': {}, 'ModelDataUrl': "s3://output/{}/output/model.tar.gz".format(job_name) }, 'ExecutionRoleArn': '{{ role }}' } assert config == expected_config
def test_model_config_from_framework_estimator(sagemaker_session): mxnet_estimator = mxnet.MXNet( entry_point="{{ entry_point }}", source_dir="{{ source_dir }}", py_version="py3", framework_version="1.3.0", role="{{ role }}", train_instance_count=1, train_instance_type="ml.m4.xlarge", sagemaker_session=sagemaker_session, base_job_name="{{ base_job_name }}", hyperparameters={"batch_size": 100}, ) data = "{{ training_data }}" # simulate training airflow.training_config(mxnet_estimator, data) config = airflow.model_config_from_estimator( instance_type="ml.c4.xlarge", estimator=mxnet_estimator, task_id="task_id", task_type="training", ) expected_config = { "ModelName": "sagemaker-mxnet-%s" % TIME_STAMP, "PrimaryContainer": { "Image": "520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet:1.3.0-cpu-py3", "Environment": { "SAGEMAKER_PROGRAM": "{{ entry_point }}", "SAGEMAKER_SUBMIT_DIRECTORY": "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Training']" "['TrainingJobName'] }}/source/sourcedir.tar.gz", "SAGEMAKER_ENABLE_CLOUDWATCH_METRICS": "false", "SAGEMAKER_CONTAINER_LOG_LEVEL": "20", "SAGEMAKER_REGION": "us-west-2", }, "ModelDataUrl": "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Training']['TrainingJobName'] }}" "/output/model.tar.gz", }, "ExecutionRoleArn": "{{ role }}", } assert config == expected_config
def test_model_config_from_framework_estimator(sagemaker_session): mxnet_estimator = mxnet.MXNet(entry_point="{{ entry_point }}", source_dir="{{ source_dir }}", py_version='py3', framework_version='1.3.0', role="{{ role }}", train_instance_count=1, train_instance_type='ml.m4.xlarge', sagemaker_session=sagemaker_session, base_job_name="{{ base_job_name }}", hyperparameters={'batch_size': 100}) data = "{{ training_data }}" # simulate training airflow.training_config(mxnet_estimator, data) config = airflow.model_config_from_estimator(instance_type='ml.c4.xlarge', estimator=mxnet_estimator, task_id='task_id', task_type='training') expected_config = { 'ModelName': "sagemaker-mxnet-%s" % TIME_STAMP, 'PrimaryContainer': { 'Image': '520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet:1.3.0-cpu-py3', 'Environment': { 'SAGEMAKER_PROGRAM': '{{ entry_point }}', 'SAGEMAKER_SUBMIT_DIRECTORY': "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Training']" "['TrainingJobName'] }}/source/sourcedir.tar.gz", 'SAGEMAKER_ENABLE_CLOUDWATCH_METRICS': 'false', 'SAGEMAKER_CONTAINER_LOG_LEVEL': '20', 'SAGEMAKER_REGION': 'us-west-2' }, 'ModelDataUrl': "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Training']['TrainingJobName'] }}" "/output/model.tar.gz" }, 'ExecutionRoleArn': '{{ role }}' } assert config == expected_config