def add_step_to_emr(cluster_create_task, task_identifier, step_params, cluster_remover, task_create_cluster, aws_connection, dag): """ In case we need to add multiple steps to the cluster cluster_create_task: ID of task that creates a cluster task_identifier: ID of step step_params: parameters to pass to the step cluster_remover: task that terminates the cluster task_create_cluster: task that creates the cluster aws_connection: Connection to AWS for account credentials dag: DAG that is created by the user """ step_adder = EmrAddStepsOperator( task_id=task_identifier, job_flow_id="{{ task_instance.xcom_pull('" + task_create_cluster + "', key='return_value') }}", aws_conn_id=aws_connection, steps=step_params, dag=dag) step_checker = EmrStepSensor( task_id=task_identifier + '_watch_step', job_flow_id="{{ task_instance.xcom_pull('" + task_create_cluster + "', key='return_value') }}", step_id="{{ task_instance.xcom_pull('" + task_identifier + "', key='return_value')[0] }}", aws_conn_id=aws_connection, dag=dag) cluster_create_task.set_downstream(step_adder) step_adder.set_downstream(step_checker) step_checker.set_downstream(cluster_remover)
aws_conn_id='aws_default', emr_conn_id='emr_default', dag=dag) step_adder = EmrAddStepsOperator( task_id='add_steps', job_flow_id= "{{ task_instance.xcom_pull('create_job_flow', key='return_value') }}", aws_conn_id='aws_default', steps=SPARK_TEST_STEPS, dag=dag) step_checker = EmrStepSensor( task_id='watch_step', job_flow_id= "{{ task_instance.xcom_pull('create_job_flow', key='return_value') }}", step_id="{{ task_instance.xcom_pull('add_steps', key='return_value')[0] }}", aws_conn_id='aws_default', dag=dag) cluster_remover = EmrTerminateJobFlowOperator( task_id='remove_cluster', job_flow_id= "{{ task_instance.xcom_pull('create_job_flow', key='return_value') }}", aws_conn_id='aws_default', dag=dag) cluster_creator.set_downstream(step_adder) step_adder.set_downstream(step_checker) step_checker.set_downstream(cluster_remover)
emr_conn_id='emr_default', dag=dag ) step_adder = EmrAddStepsOperator( task_id='add_steps', job_flow_id="{{ task_instance.xcom_pull('create_job_flow', key='return_value') }}", aws_conn_id='aws_default', steps=SPARK_TEST_STEPS, dag=dag ) step_checker = EmrStepSensor( task_id='watch_step', job_flow_id="{{ task_instance.xcom_pull('create_job_flow', key='return_value') }}", step_id="{{ task_instance.xcom_pull('add_steps', key='return_value')[0] }}", aws_conn_id='aws_default', dag=dag ) cluster_remover = EmrTerminateJobFlowOperator( task_id='remove_cluster', job_flow_id="{{ task_instance.xcom_pull('create_job_flow', key='return_value') }}", aws_conn_id='aws_default', dag=dag ) cluster_creator.set_downstream(step_adder) step_adder.set_downstream(step_checker) step_checker.set_downstream(cluster_remover)