def setUp(self):
     self.sagemaker = SageMakerTransformOperator(
         task_id='test_sagemaker_operator',
         aws_conn_id='sagemaker_test_id',
         config=config,
         wait_for_completion=False,
         check_interval=5)
Example #2
0
def test_pytorch_airflow_config_uploads_data_source_to_s3_when_inputs_not_provided(
        sagemaker_session, cpu_instance_type):
    with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
        estimator = PyTorch(
            entry_point=PYTORCH_MNIST_SCRIPT,
            role=ROLE,
            framework_version="1.1.0",
            train_instance_count=2,
            train_instance_type=cpu_instance_type,
            hyperparameters={
                "epochs": 6,
                "backend": "gloo"
            },
        )

        train_config = sm_airflow.training_config(estimator=estimator)

        uploaded_s3_data = train_config["HyperParameters"][
            "sagemaker_submit_directory"].strip('"')

        transform_config = sm_airflow.transform_config_from_estimator(
            estimator=estimator,
            task_id="transform_config",
            task_type="training",
            instance_count=SINGLE_INSTANCE_COUNT,
            instance_type=cpu_instance_type,
            data=uploaded_s3_data,
            content_type="text/csv",
        )

        default_args = {
            "owner": "airflow",
            "start_date": airflow.utils.dates.days_ago(2),
            "provide_context": True,
        }

        dag = DAG("tensorflow_example",
                  default_args=default_args,
                  schedule_interval="@once")

        train_op = SageMakerTrainingOperator(task_id="tf_training",
                                             config=train_config,
                                             wait_for_completion=True,
                                             dag=dag)

        transform_op = SageMakerTransformOperator(task_id="transform_operator",
                                                  config=transform_config,
                                                  wait_for_completion=True,
                                                  dag=dag)

        transform_op.set_upstream(train_op)

        _assert_that_s3_url_contains_data(sagemaker_session, uploaded_s3_data)
class TestSageMakerTransformOperator(unittest.TestCase):
    def setUp(self):
        configuration.load_test_config()
        self.sagemaker = SageMakerTransformOperator(
            task_id='test_sagemaker_operator',
            aws_conn_id='sagemaker_test_id',
            config=config,
            wait_for_completion=False,
            check_interval=5)

    def test_parse_config_integers(self):
        self.sagemaker.parse_config_integers()
        test_config = self.sagemaker.config['Transform']
        self.assertEqual(
            test_config['TransformResources']['InstanceCount'],
            int(test_config['TransformResources']['InstanceCount']))
        self.assertEqual(test_config['MaxConcurrentTransforms'],
                         int(test_config['MaxConcurrentTransforms']))
        self.assertEqual(test_config['MaxPayloadInMB'],
                         int(test_config['MaxPayloadInMB']))

    @mock.patch.object(SageMakerHook, 'get_conn')
    @mock.patch.object(SageMakerHook, 'create_model')
    @mock.patch.object(SageMakerHook, 'create_transform_job')
    def test_execute(self, mock_transform, mock_model, mock_client):
        mock_transform.return_value = {
            'TransformJobArn': 'testarn',
            'ResponseMetadata': {
                'HTTPStatusCode': 200
            }
        }
        self.sagemaker.execute(None)
        mock_model.assert_called_once_with(create_model_params)
        mock_transform.assert_called_once_with(create_transform_params,
                                               wait_for_completion=False,
                                               check_interval=5,
                                               max_ingestion_time=None)

    @mock.patch.object(SageMakerHook, 'get_conn')
    @mock.patch.object(SageMakerHook, 'create_model')
    @mock.patch.object(SageMakerHook, 'create_transform_job')
    def test_execute_with_failure(self, mock_transform, mock_model,
                                  mock_client):
        mock_transform.return_value = {
            'TransformJobArn': 'testarn',
            'ResponseMetadata': {
                'HTTPStatusCode': 404
            }
        }
        self.assertRaises(AirflowException, self.sagemaker.execute, None)
def _build_airflow_workflow(estimator,
                            instance_type,
                            inputs=None,
                            mini_batch_size=None):
    training_config = sm_airflow.training_config(
        estimator=estimator, inputs=inputs, mini_batch_size=mini_batch_size)

    model = estimator.create_model()
    assert model is not None

    model_config = sm_airflow.model_config(instance_type, model)
    assert model_config is not None

    transform_config = sm_airflow.transform_config_from_estimator(
        estimator=estimator,
        task_id="transform_config",
        task_type="training",
        instance_count=SINGLE_INSTANCE_COUNT,
        instance_type=estimator.train_instance_type,
        data=inputs,
        content_type="text/csv",
        input_filter="$",
        output_filter="$",
    )

    default_args = {
        "owner": "airflow",
        "start_date": airflow.utils.dates.days_ago(2),
        "provide_context": True,
    }

    dag = DAG("tensorflow_example",
              default_args=default_args,
              schedule_interval="@once")

    train_op = SageMakerTrainingOperator(task_id="tf_training",
                                         config=training_config,
                                         wait_for_completion=True,
                                         dag=dag)

    transform_op = SageMakerTransformOperator(task_id="transform_operator",
                                              config=transform_config,
                                              wait_for_completion=True,
                                              dag=dag)

    transform_op.set_upstream(train_op)

    return training_config
class TestSageMakerTransformOperator(unittest.TestCase):

    def setUp(self):
        configuration.load_test_config()
        self.sagemaker = SageMakerTransformOperator(
            task_id='test_sagemaker_operator',
            aws_conn_id='sagemaker_test_id',
            config=config,
            wait_for_completion=False,
            check_interval=5
        )

    def test_parse_config_integers(self):
        self.sagemaker.parse_config_integers()
        test_config = self.sagemaker.config['Transform']
        self.assertEqual(test_config['TransformResources']['InstanceCount'],
                         int(test_config['TransformResources']['InstanceCount']))
        self.assertEqual(test_config['MaxConcurrentTransforms'],
                         int(test_config['MaxConcurrentTransforms']))
        self.assertEqual(test_config['MaxPayloadInMB'],
                         int(test_config['MaxPayloadInMB']))

    @mock.patch.object(SageMakerHook, 'get_conn')
    @mock.patch.object(SageMakerHook, 'create_model')
    @mock.patch.object(SageMakerHook, 'create_transform_job')
    def test_execute(self, mock_transform, mock_model, mock_client):
        mock_transform.return_value = {'TransformJobArn': 'testarn',
                                       'ResponseMetadata':
                                       {'HTTPStatusCode': 200}}
        self.sagemaker.execute(None)
        mock_model.assert_called_once_with(create_model_params)
        mock_transform.assert_called_once_with(create_transform_params,
                                               wait_for_completion=False,
                                               check_interval=5,
                                               max_ingestion_time=None
                                               )

    @mock.patch.object(SageMakerHook, 'get_conn')
    @mock.patch.object(SageMakerHook, 'create_model')
    @mock.patch.object(SageMakerHook, 'create_transform_job')
    def test_execute_with_failure(self, mock_transform, mock_model, mock_client):
        mock_transform.return_value = {'TransformJobArn': 'testarn',
                                       'ResponseMetadata':
                                       {'HTTPStatusCode': 404}}
        self.assertRaises(AirflowException, self.sagemaker.execute, None)
 def setUp(self):
     configuration.load_test_config()
     self.sagemaker = SageMakerTransformOperator(
         task_id='test_sagemaker_operator',
         aws_conn_id='sagemaker_test_id',
         config=config,
         wait_for_completion=False,
         check_interval=5
     )
                                             wait_for_completion=True,
                                             check_interval=30)

# launch sagemaker hyperparameter job and wait until it completes
tune_model_task = SageMakerTuningOperator(task_id='model_tuning',
                                          dag=dag,
                                          config=tuner_config,
                                          aws_conn_id='airflow-sagemaker',
                                          wait_for_completion=True,
                                          check_interval=30)

# launch sagemaker batch transform job and wait until it completes
batch_transform_task = SageMakerTransformOperator(
    task_id='predicting',
    dag=dag,
    config=transform_config,
    aws_conn_id='airflow-sagemaker',
    wait_for_completion=True,
    check_interval=30,
    trigger_rule=TriggerRule.ONE_SUCCESS)

basher_task = BashOperator(task_id='sleep', bash_command='sleep 5', dag=dag)

cleanup_task = DummyOperator(task_id='cleaning_up', dag=dag)

# set the dependencies between tasks

init >> preprocess_task
preprocess_task >> prepare_task
prepare_task >> branching
branching >> tune_model_task
branching >> train_model_task
Example #8
0
        'role': role,
        'sess': sess,
        'spark_model_uri':
        config['inference_pipeline']['inputs']['spark_model'],
        'pipeline_model_name':
        config['inference_pipeline']['pipeline_model_name'],
        'endpoint_name': config['inference_pipeline']['pipeline_model_name'],
        'region': region,
        'bucket': bucket
    })

# launch sagemaker batch transform job and wait until it completes
batch_transform_task = SageMakerTransformOperator(
    task_id='batch_predicting',
    dag=dag,
    config=transform_config,
    aws_conn_id='airflow-sagemaker',
    wait_for_completion=True,
    check_interval=30)

# Cleanup task, deletes ALL SageMaker endpoints and model artifacts
# Uncomment below clean_up_task to clean up sagemaker endpoint resources and model artifacts

# clean_up_task = PythonOperator(
#    task_id='clean_up',
#    dag=dag,
#    python_callable=clean_up.clean_up,
#    op_kwargs={'region': region, "bucket": bucket}
# )

init.set_downstream(sm_proc_job_task)
Example #9
0
# launch sagemaker hyperparameter job and wait until it completes
tune_model_task = SageMakerTuningOperator(
    task_id="model_tuning",
    dag=dag,
    config=tuner_config,
    aws_conn_id="airflow-sagemaker",
    wait_for_completion=True,
    check_interval=30,
)

# launch sagemaker batch transform job and wait until it completes
batch_transform_task = SageMakerTransformOperator(
    task_id="predicting",
    dag=dag,
    config=transform_config,
    aws_conn_id="airflow-sagemaker",
    wait_for_completion=True,
    check_interval=30,
    trigger_rule=TriggerRule.ONE_SUCCESS,
)

cleanup_task = DummyOperator(task_id="cleaning_up", dag=dag)

# set the dependencies between tasks

init.set_downstream(preprocess_task)
preprocess_task.set_downstream(prepare_task)
prepare_task.set_downstream(branching)
branching.set_downstream(tune_model_task)
branching.set_downstream(train_model_task)
tune_model_task.set_downstream(batch_transform_task)