def test_model_config_from_amazon_alg_estimator(sagemaker_session): knn_estimator = knn.KNN( role="{{ role }}", train_instance_count="{{ instance_count }}", train_instance_type="ml.m4.xlarge", k=16, sample_size=128, predictor_type="regressor", sagemaker_session=sagemaker_session, ) record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, "S3Prefix") # simulate training airflow.training_config(knn_estimator, record, mini_batch_size=256) config = airflow.model_config_from_estimator(instance_type="ml.c4.xlarge", estimator=knn_estimator, task_id="task_id", task_type="tuning") expected_config = { "ModelName": "knn-%s" % TIME_STAMP, "PrimaryContainer": { "Image": "174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1", "Environment": {}, "ModelDataUrl": "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Tuning']['BestTrainingJob']" "['TrainingJobName'] }}/output/model.tar.gz", }, "ExecutionRoleArn": "{{ role }}", } assert config == expected_config
def test_transform_config_from_amazon_alg_estimator(sagemaker_session): knn_estimator = knn.KNN( role="{{ role }}", train_instance_count="{{ instance_count }}", train_instance_type="ml.m4.xlarge", k=16, sample_size=128, predictor_type="regressor", sagemaker_session=sagemaker_session, ) record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, "S3Prefix") transform_data = "{{ transform_data }}" # simulate training airflow.training_config(knn_estimator, record, mini_batch_size=256) config = airflow.transform_config_from_estimator( estimator=knn_estimator, task_id="task_id", task_type="training", instance_count="{{ instance_count }}", instance_type="ml.p2.xlarge", data=transform_data, ) expected_config = { "Model": { "ModelName": "knn-%s" % TIME_STAMP, "PrimaryContainer": { "Image": "174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1", "Environment": {}, "ModelDataUrl": "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Training']['TrainingJobName'] }}" "/output/model.tar.gz", }, "ExecutionRoleArn": "{{ role }}", }, "Transform": { "TransformJobName": "knn-%s" % TIME_STAMP, "ModelName": "knn-%s" % TIME_STAMP, "TransformInput": { "DataSource": { "S3DataSource": { "S3DataType": "S3Prefix", "S3Uri": "{{ transform_data }}" } } }, "TransformOutput": { "S3OutputPath": "s3://output/knn-%s" % TIME_STAMP }, "TransformResources": { "InstanceCount": "{{ instance_count }}", "InstanceType": "ml.p2.xlarge", }, }, } assert config == expected_config
def test_model_config_from_amazon_alg_estimator(sagemaker_session): knn_estimator = knn.KNN(role="{{ role }}", train_instance_count="{{ instance_count }}", train_instance_type='ml.m4.xlarge', k=16, sample_size=128, predictor_type='regressor', sagemaker_session=sagemaker_session) record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, 'S3Prefix') # simulate training airflow.training_config(knn_estimator, record, mini_batch_size=256) config = airflow.model_config_from_estimator(instance_type='ml.c4.xlarge', estimator=knn_estimator, task_id='task_id', task_type='tuning') expected_config = { 'ModelName': "knn-%s" % TIME_STAMP, 'PrimaryContainer': { 'Image': '174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1', 'Environment': {}, 'ModelDataUrl': "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Tuning']['BestTrainingJob']" "['TrainingJobName'] }}/output/model.tar.gz" }, 'ExecutionRoleArn': '{{ role }}' } assert config == expected_config
def test_model_config_from_amazon_alg_estimator(sagemaker_session): job_name = get_job_name('knn') knn_estimator = knn.KNN(role="{{ role }}", train_instance_count="{{ instance_count }}", train_instance_type='ml.m4.xlarge', k=16, sample_size=128, predictor_type='regressor', sagemaker_session=sagemaker_session) record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, 'S3Prefix') # simulate training airflow.training_config(knn_estimator, record, mini_batch_size=256) config = airflow.model_config_from_estimator(instance_type='ml.c4.xlarge', estimator=knn_estimator) expected_config = { 'ModelName': job_name, 'PrimaryContainer': { 'Image': '174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1', 'Environment': {}, 'ModelDataUrl': "s3://output/{}/output/model.tar.gz".format(job_name) }, 'ExecutionRoleArn': '{{ role }}' } assert config == expected_config
def test_transform_config_from_amazon_alg_estimator(sagemaker_session): knn_estimator = knn.KNN(role="{{ role }}", train_instance_count="{{ instance_count }}", train_instance_type='ml.m4.xlarge', k=16, sample_size=128, predictor_type='regressor', sagemaker_session=sagemaker_session) record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, 'S3Prefix') transform_data = "{{ transform_data }}" # simulate training airflow.training_config(knn_estimator, record, mini_batch_size=256) config = airflow.transform_config_from_estimator( estimator=knn_estimator, instance_count="{{ instance_count }}", instance_type="ml.p2.xlarge", data=transform_data) expected_config = { 'Model': { 'ModelName': "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}", 'PrimaryContainer': { 'Image': '174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1', 'Environment': {}, 'ModelDataUrl': "s3://output/knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}" "/output/model.tar.gz" }, 'ExecutionRoleArn': '{{ role }}' }, 'Transform': { 'TransformJobName': "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}", 'ModelName': "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}", 'TransformInput': { 'DataSource': { 'S3DataSource': { 'S3DataType': 'S3Prefix', 'S3Uri': '{{ transform_data }}' } } }, 'TransformOutput': { 'S3OutputPath': "s3://output/knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}" }, 'TransformResources': { 'InstanceCount': '{{ instance_count }}', 'InstanceType': 'ml.p2.xlarge' } } } assert config == expected_config
def test_deploy_config_from_amazon_alg_estimator(sagemaker_session): knn_estimator = knn.KNN(role="{{ role }}", train_instance_count="{{ instance_count }}", train_instance_type='ml.m4.xlarge', k=16, sample_size=128, predictor_type='regressor', sagemaker_session=sagemaker_session) record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, 'S3Prefix') # simulate training airflow.training_config(knn_estimator, record, mini_batch_size=256) config = airflow.deploy_config_from_estimator( estimator=knn_estimator, initial_instance_count="{{ instance_count }}", instance_type="ml.p2.xlarge") expected_config = { 'Model': { 'ModelName': "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}", 'PrimaryContainer': { 'Image': '174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1', 'Environment': {}, 'ModelDataUrl': "s3://output/knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}" "/output/model.tar.gz" }, 'ExecutionRoleArn': '{{ role }}' }, 'EndpointConfig': { 'EndpointConfigName': "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}", 'ProductionVariants': [{ 'InstanceType': 'ml.p2.xlarge', 'InitialInstanceCount': '{{ instance_count }}', 'ModelName': "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}", 'VariantName': 'AllTraffic', 'InitialVariantWeight': 1 }] }, 'Endpoint': { 'EndpointName': "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}", 'EndpointConfigName': "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}" } } assert config == expected_config