def test_execute_terminates_the_job_flow_and_does_not_error(self): with patch('boto3.session.Session', self.boto3_session_mock): operator = EmrTerminateJobFlowOperator(task_id='test_task', job_flow_id='j-8989898989', aws_conn_id='aws_default') operator.execute(None)
# [START howto_operator_emr_manual_steps_tasks] cluster_creator = EmrCreateJobFlowOperator( task_id='create_job_flow', job_flow_overrides=JOB_FLOW_OVERRIDES, aws_conn_id='aws_default', emr_conn_id='emr_default') step_adder = EmrAddStepsOperator( task_id='add_steps', job_flow_id= "{{ task_instance.xcom_pull(task_ids='create_job_flow', key='return_value') }}", aws_conn_id='aws_default', steps=SPARK_STEPS) step_checker = EmrStepSensor( task_id='watch_step', job_flow_id= "{{ task_instance.xcom_pull('create_job_flow', key='return_value') }}", step_id= "{{ task_instance.xcom_pull(task_ids='add_steps', key='return_value')[0] }}", aws_conn_id='aws_default') cluster_remover = EmrTerminateJobFlowOperator( task_id='remove_cluster', job_flow_id= "{{ task_instance.xcom_pull(task_ids='create_job_flow', key='return_value') }}", aws_conn_id='aws_default') cluster_creator >> step_adder >> step_checker >> cluster_remover # [END howto_operator_emr_manual_steps_tasks]
# create non entity specific tasks start_dag = DummyOperator( task_id='start_dag', ) create_emr_cluster = EmrCreateJobFlowOperator( task_id='create_emr_cluster', aws_conn_id='aws_default', emr_conn_id='emr_default' ) job_flow_id="{{ task_instance.xcom_pull(task_ids='create_emr_cluster', key='return_value') }}" terminate_emr_cluster = EmrTerminateJobFlowOperator( task_id='terminate_emr_cluster', job_flow_id=job_flow_id, aws_conn_id='aws_default' ) # create_emr_cluster = DummyOperator( # task_id='create_emr_cluster', # ) # job_flow_id = 'j-2NFK2ZRYYVS71' # terminate_emr_cluster = DummyOperator( # task_id='terminate_emr_cluster', # ) # create DAG for each entity for key, file in SPARK_FILES.items(): # add initial steps
# [START howto_operator_emr_manual_steps_tasks] cluster_creator = EmrCreateJobFlowOperator( task_id='create_job_flow', job_flow_overrides=JOB_FLOW_OVERRIDES, ) step_adder = EmrAddStepsOperator( task_id='add_steps', job_flow_id=cluster_creator.output, steps=SPARK_STEPS, ) step_checker = EmrStepSensor( task_id='watch_step', job_flow_id=cluster_creator.output, step_id= "{{ task_instance.xcom_pull(task_ids='add_steps', key='return_value')[0] }}", ) cluster_remover = EmrTerminateJobFlowOperator( task_id='remove_cluster', job_flow_id=cluster_creator.output) step_adder >> step_checker >> cluster_remover # [END howto_operator_emr_manual_steps_tasks] # Task dependencies created via `XComArgs`: # cluster_creator >> step_adder # cluster_creator >> step_checker # cluster_creator >> cluster_remover
dag.doc_md = __doc__ create_cluster = EmrCreateJobFlowOperator( dag=dag, task_id="create_cluster", job_flow_overrides=job_flow_overrides, aws_conn_id=aws_conn_id) wait_cluster_completion = EmrJobFlowSensor( task_id='wait_cluster_completion', job_flow_id=cluster_id, aws_conn_id=aws_conn_id, target_states=["RUNNING", "WAITING"], dag=dag) terminate_cluster = EmrTerminateJobFlowOperator( task_id="terminate_cluster", trigger_rule="all_done", job_flow_id=cluster_id, aws_conn_id=aws_conn_id, dag=dag) with TaskGroup("run_immigration_mapping") as run_immigration_mapping: add_step, wait_step = emr_step_task_group( script_name='immigration_mapping', cluster_id=cluster_id, aws_conn_id=aws_conn_id, dag=dag) with TaskGroup("run_country") as run_country: add_step, wait_step = emr_step_task_group(script_name='country', cluster_id=cluster_id, aws_conn_id=aws_conn_id, dag=dag)
emr_conn_id='emr_default', ) step_adder = EmrAddStepsOperator( task_id='add_steps', job_flow_id=cluster_creator.output, aws_conn_id='aws_default', steps=SPARK_STEPS, ) step_checker = EmrStepSensor( task_id='watch_step', job_flow_id=cluster_creator.output, step_id="{{ task_instance.xcom_pull(task_ids='add_steps', key='return_value')[0] }}", aws_conn_id='aws_default', ) cluster_remover = EmrTerminateJobFlowOperator( task_id='remove_cluster', job_flow_id=cluster_creator.output, aws_conn_id='aws_default', ) step_adder >> step_checker >> cluster_remover # [END howto_operator_emr_manual_steps_tasks] # Task dependencies created via `XComArgs`: # cluster_creator >> step_adder # cluster_creator >> step_checker # cluster_creator >> cluster_remover
job_flow_overrides=cluster_conf) add_step_task = EmrAddStepsOperator( task_id='My_first_job', job_flow_id= "{{ task_instance.xcom_pull(task_ids='create_job_flow', key='return_value') }}", aws_conn_id='my_aws_conn', steps=my_first_job) watch_prev_step_task = EmrStepSensor( task_id='watch_prev_step', job_flow_id= "{{task_instance.xcom_pull(task_ids='create_job_flow', key='return_value')}}", step_id= "{{task_instance.xcom_pull(task_ids='My_first_job', key='return_value')}}", aws_conn_id='aws_default') terminate_job_flow_task = EmrTerminateJobFlowOperator( task_id='terminate_job_flow', job_flow_id= "{{task_instance.xcom_pull(task_ids='create_job_flow', key='return_value')}}", aws_conn_id='aws_default', trigger_rule="all_done") #dependencies check_data_exists_task >> create_job_flow_task create_job_flow_task >> add_step_task add_step_task >> watch_prev_step_task watch_prev_step_task >> terminate_job_flow_task
task_id="wait_for_step_load_raw_data", job_flow_id=create_cluster.output, step_id="{{ task_instance.xcom_pull(task_ids='add_step_load_raw_data', key='return_value')[0] }}", aws_conn_id="aws_default", ) add_step_transform = EmrAddStepsOperator( task_id="add_step_transform", job_flow_id=create_cluster.output, aws_conn_id="aws_default", steps=SparkSteps.TRANSFORM, ) wait_for_step_transform = EmrStepSensor( task_id="wait_for_step_transform", job_flow_id=create_cluster.output, step_id="{{ task_instance.xcom_pull(task_ids='add_step_transform', key='return_value')[0] }}", aws_conn_id="aws_default", ) terminate_cluster = EmrTerminateJobFlowOperator( task_id="terminate_cluster", job_flow_id=create_cluster.output, aws_conn_id="aws_default", trigger_rule=TriggerRule.ALL_DONE ) add_step_load_raw_data >> wait_for_step_load_raw_data >> \ add_step_transform >> wait_for_step_transform >> \ terminate_cluster
dag=dag) emr_step_2 = EmrAddStepsOperator( task_id='emr_step2', job_flow_id= "{{ task_instance.xcom_pull(task_ids='create_job_flow', key='return_value') }}", aws_conn_id='aws_default', steps=step2, dag=dag) emr_step_sensor = EmrStepSensor( task_id='watch_step', job_flow_id= "{{ task_instance.xcom_pull('create_job_flow', key='return_value') }}", step_id= "{{ task_instance.xcom_pull(task_ids='emr_step2', key='return_value')[0] }}", aws_conn_id='aws_default', dag=dag) stop_emr_cluster = EmrTerminateJobFlowOperator( task_id='stop_emr1', job_flow_id= "{{ task_instance.xcom_pull(task_ids='create_job_flow', key='return_value') }}", aws_conn_id='aws_default', dag=dag) create_emr_cluster >> emr_step_1 emr_step_1 >> emr_step_2 emr_step_2 >> emr_step_sensor emr_step_sensor >> stop_emr_cluster