def setUp(self): self.sagemaker = SageMakerTransformOperator( task_id='test_sagemaker_operator', aws_conn_id='sagemaker_test_id', config=config, wait_for_completion=False, check_interval=5)
def test_pytorch_airflow_config_uploads_data_source_to_s3_when_inputs_not_provided( sagemaker_session, cpu_instance_type): with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS): estimator = PyTorch( entry_point=PYTORCH_MNIST_SCRIPT, role=ROLE, framework_version="1.1.0", train_instance_count=2, train_instance_type=cpu_instance_type, hyperparameters={ "epochs": 6, "backend": "gloo" }, ) train_config = sm_airflow.training_config(estimator=estimator) uploaded_s3_data = train_config["HyperParameters"][ "sagemaker_submit_directory"].strip('"') transform_config = sm_airflow.transform_config_from_estimator( estimator=estimator, task_id="transform_config", task_type="training", instance_count=SINGLE_INSTANCE_COUNT, instance_type=cpu_instance_type, data=uploaded_s3_data, content_type="text/csv", ) default_args = { "owner": "airflow", "start_date": airflow.utils.dates.days_ago(2), "provide_context": True, } dag = DAG("tensorflow_example", default_args=default_args, schedule_interval="@once") train_op = SageMakerTrainingOperator(task_id="tf_training", config=train_config, wait_for_completion=True, dag=dag) transform_op = SageMakerTransformOperator(task_id="transform_operator", config=transform_config, wait_for_completion=True, dag=dag) transform_op.set_upstream(train_op) _assert_that_s3_url_contains_data(sagemaker_session, uploaded_s3_data)
class TestSageMakerTransformOperator(unittest.TestCase): def setUp(self): configuration.load_test_config() self.sagemaker = SageMakerTransformOperator( task_id='test_sagemaker_operator', aws_conn_id='sagemaker_test_id', config=config, wait_for_completion=False, check_interval=5) def test_parse_config_integers(self): self.sagemaker.parse_config_integers() test_config = self.sagemaker.config['Transform'] self.assertEqual( test_config['TransformResources']['InstanceCount'], int(test_config['TransformResources']['InstanceCount'])) self.assertEqual(test_config['MaxConcurrentTransforms'], int(test_config['MaxConcurrentTransforms'])) self.assertEqual(test_config['MaxPayloadInMB'], int(test_config['MaxPayloadInMB'])) @mock.patch.object(SageMakerHook, 'get_conn') @mock.patch.object(SageMakerHook, 'create_model') @mock.patch.object(SageMakerHook, 'create_transform_job') def test_execute(self, mock_transform, mock_model, mock_client): mock_transform.return_value = { 'TransformJobArn': 'testarn', 'ResponseMetadata': { 'HTTPStatusCode': 200 } } self.sagemaker.execute(None) mock_model.assert_called_once_with(create_model_params) mock_transform.assert_called_once_with(create_transform_params, wait_for_completion=False, check_interval=5, max_ingestion_time=None) @mock.patch.object(SageMakerHook, 'get_conn') @mock.patch.object(SageMakerHook, 'create_model') @mock.patch.object(SageMakerHook, 'create_transform_job') def test_execute_with_failure(self, mock_transform, mock_model, mock_client): mock_transform.return_value = { 'TransformJobArn': 'testarn', 'ResponseMetadata': { 'HTTPStatusCode': 404 } } self.assertRaises(AirflowException, self.sagemaker.execute, None)
def _build_airflow_workflow(estimator, instance_type, inputs=None, mini_batch_size=None): training_config = sm_airflow.training_config( estimator=estimator, inputs=inputs, mini_batch_size=mini_batch_size) model = estimator.create_model() assert model is not None model_config = sm_airflow.model_config(instance_type, model) assert model_config is not None transform_config = sm_airflow.transform_config_from_estimator( estimator=estimator, task_id="transform_config", task_type="training", instance_count=SINGLE_INSTANCE_COUNT, instance_type=estimator.train_instance_type, data=inputs, content_type="text/csv", input_filter="$", output_filter="$", ) default_args = { "owner": "airflow", "start_date": airflow.utils.dates.days_ago(2), "provide_context": True, } dag = DAG("tensorflow_example", default_args=default_args, schedule_interval="@once") train_op = SageMakerTrainingOperator(task_id="tf_training", config=training_config, wait_for_completion=True, dag=dag) transform_op = SageMakerTransformOperator(task_id="transform_operator", config=transform_config, wait_for_completion=True, dag=dag) transform_op.set_upstream(train_op) return training_config
class TestSageMakerTransformOperator(unittest.TestCase): def setUp(self): configuration.load_test_config() self.sagemaker = SageMakerTransformOperator( task_id='test_sagemaker_operator', aws_conn_id='sagemaker_test_id', config=config, wait_for_completion=False, check_interval=5 ) def test_parse_config_integers(self): self.sagemaker.parse_config_integers() test_config = self.sagemaker.config['Transform'] self.assertEqual(test_config['TransformResources']['InstanceCount'], int(test_config['TransformResources']['InstanceCount'])) self.assertEqual(test_config['MaxConcurrentTransforms'], int(test_config['MaxConcurrentTransforms'])) self.assertEqual(test_config['MaxPayloadInMB'], int(test_config['MaxPayloadInMB'])) @mock.patch.object(SageMakerHook, 'get_conn') @mock.patch.object(SageMakerHook, 'create_model') @mock.patch.object(SageMakerHook, 'create_transform_job') def test_execute(self, mock_transform, mock_model, mock_client): mock_transform.return_value = {'TransformJobArn': 'testarn', 'ResponseMetadata': {'HTTPStatusCode': 200}} self.sagemaker.execute(None) mock_model.assert_called_once_with(create_model_params) mock_transform.assert_called_once_with(create_transform_params, wait_for_completion=False, check_interval=5, max_ingestion_time=None ) @mock.patch.object(SageMakerHook, 'get_conn') @mock.patch.object(SageMakerHook, 'create_model') @mock.patch.object(SageMakerHook, 'create_transform_job') def test_execute_with_failure(self, mock_transform, mock_model, mock_client): mock_transform.return_value = {'TransformJobArn': 'testarn', 'ResponseMetadata': {'HTTPStatusCode': 404}} self.assertRaises(AirflowException, self.sagemaker.execute, None)
def setUp(self): configuration.load_test_config() self.sagemaker = SageMakerTransformOperator( task_id='test_sagemaker_operator', aws_conn_id='sagemaker_test_id', config=config, wait_for_completion=False, check_interval=5 )
wait_for_completion=True, check_interval=30) # launch sagemaker hyperparameter job and wait until it completes tune_model_task = SageMakerTuningOperator(task_id='model_tuning', dag=dag, config=tuner_config, aws_conn_id='airflow-sagemaker', wait_for_completion=True, check_interval=30) # launch sagemaker batch transform job and wait until it completes batch_transform_task = SageMakerTransformOperator( task_id='predicting', dag=dag, config=transform_config, aws_conn_id='airflow-sagemaker', wait_for_completion=True, check_interval=30, trigger_rule=TriggerRule.ONE_SUCCESS) basher_task = BashOperator(task_id='sleep', bash_command='sleep 5', dag=dag) cleanup_task = DummyOperator(task_id='cleaning_up', dag=dag) # set the dependencies between tasks init >> preprocess_task preprocess_task >> prepare_task prepare_task >> branching branching >> tune_model_task branching >> train_model_task
'role': role, 'sess': sess, 'spark_model_uri': config['inference_pipeline']['inputs']['spark_model'], 'pipeline_model_name': config['inference_pipeline']['pipeline_model_name'], 'endpoint_name': config['inference_pipeline']['pipeline_model_name'], 'region': region, 'bucket': bucket }) # launch sagemaker batch transform job and wait until it completes batch_transform_task = SageMakerTransformOperator( task_id='batch_predicting', dag=dag, config=transform_config, aws_conn_id='airflow-sagemaker', wait_for_completion=True, check_interval=30) # Cleanup task, deletes ALL SageMaker endpoints and model artifacts # Uncomment below clean_up_task to clean up sagemaker endpoint resources and model artifacts # clean_up_task = PythonOperator( # task_id='clean_up', # dag=dag, # python_callable=clean_up.clean_up, # op_kwargs={'region': region, "bucket": bucket} # ) init.set_downstream(sm_proc_job_task)
# launch sagemaker hyperparameter job and wait until it completes tune_model_task = SageMakerTuningOperator( task_id="model_tuning", dag=dag, config=tuner_config, aws_conn_id="airflow-sagemaker", wait_for_completion=True, check_interval=30, ) # launch sagemaker batch transform job and wait until it completes batch_transform_task = SageMakerTransformOperator( task_id="predicting", dag=dag, config=transform_config, aws_conn_id="airflow-sagemaker", wait_for_completion=True, check_interval=30, trigger_rule=TriggerRule.ONE_SUCCESS, ) cleanup_task = DummyOperator(task_id="cleaning_up", dag=dag) # set the dependencies between tasks init.set_downstream(preprocess_task) preprocess_task.set_downstream(prepare_task) prepare_task.set_downstream(branching) branching.set_downstream(tune_model_task) branching.set_downstream(train_model_task) tune_model_task.set_downstream(batch_transform_task)