Example #1
0
    def test_execute_terminates_the_job_flow_and_does_not_error(self):
        with patch('boto3.session.Session', self.boto3_session_mock):
            operator = EmrTerminateJobFlowOperator(task_id='test_task',
                                                   job_flow_id='j-8989898989',
                                                   aws_conn_id='aws_default')

            operator.execute(None)
Example #2
0
    # [START howto_operator_emr_manual_steps_tasks]
    cluster_creator = EmrCreateJobFlowOperator(
        task_id='create_job_flow',
        job_flow_overrides=JOB_FLOW_OVERRIDES,
        aws_conn_id='aws_default',
        emr_conn_id='emr_default')

    step_adder = EmrAddStepsOperator(
        task_id='add_steps',
        job_flow_id=
        "{{ task_instance.xcom_pull(task_ids='create_job_flow', key='return_value') }}",
        aws_conn_id='aws_default',
        steps=SPARK_STEPS)

    step_checker = EmrStepSensor(
        task_id='watch_step',
        job_flow_id=
        "{{ task_instance.xcom_pull('create_job_flow', key='return_value') }}",
        step_id=
        "{{ task_instance.xcom_pull(task_ids='add_steps', key='return_value')[0] }}",
        aws_conn_id='aws_default')

    cluster_remover = EmrTerminateJobFlowOperator(
        task_id='remove_cluster',
        job_flow_id=
        "{{ task_instance.xcom_pull(task_ids='create_job_flow', key='return_value') }}",
        aws_conn_id='aws_default')

    cluster_creator >> step_adder >> step_checker >> cluster_remover
    # [END howto_operator_emr_manual_steps_tasks]
Example #3
0
    # create non entity specific tasks
    start_dag = DummyOperator(
        task_id='start_dag',
    )

    create_emr_cluster = EmrCreateJobFlowOperator(
        task_id='create_emr_cluster',
        aws_conn_id='aws_default',
        emr_conn_id='emr_default'
    )
    job_flow_id="{{ task_instance.xcom_pull(task_ids='create_emr_cluster', key='return_value') }}"
    
    terminate_emr_cluster = EmrTerminateJobFlowOperator(
        task_id='terminate_emr_cluster',
        job_flow_id=job_flow_id,
        aws_conn_id='aws_default'
    )

    # create_emr_cluster = DummyOperator(
    #     task_id='create_emr_cluster',
    # )
    # job_flow_id = 'j-2NFK2ZRYYVS71'

    # terminate_emr_cluster = DummyOperator(
    #     task_id='terminate_emr_cluster',
    # )

    # create DAG for each entity
    for key, file in SPARK_FILES.items():
        # add initial steps
Example #4
0
    # [START howto_operator_emr_manual_steps_tasks]
    cluster_creator = EmrCreateJobFlowOperator(
        task_id='create_job_flow',
        job_flow_overrides=JOB_FLOW_OVERRIDES,
    )

    step_adder = EmrAddStepsOperator(
        task_id='add_steps',
        job_flow_id=cluster_creator.output,
        steps=SPARK_STEPS,
    )

    step_checker = EmrStepSensor(
        task_id='watch_step',
        job_flow_id=cluster_creator.output,
        step_id=
        "{{ task_instance.xcom_pull(task_ids='add_steps', key='return_value')[0] }}",
    )

    cluster_remover = EmrTerminateJobFlowOperator(
        task_id='remove_cluster', job_flow_id=cluster_creator.output)

    step_adder >> step_checker >> cluster_remover
    # [END howto_operator_emr_manual_steps_tasks]

    # Task dependencies created via `XComArgs`:
    #   cluster_creator >> step_adder
    #   cluster_creator >> step_checker
    #   cluster_creator >> cluster_remover
    dag.doc_md = __doc__

    create_cluster = EmrCreateJobFlowOperator(
        dag=dag,
        task_id="create_cluster",
        job_flow_overrides=job_flow_overrides,
        aws_conn_id=aws_conn_id)
    wait_cluster_completion = EmrJobFlowSensor(
        task_id='wait_cluster_completion',
        job_flow_id=cluster_id,
        aws_conn_id=aws_conn_id,
        target_states=["RUNNING", "WAITING"],
        dag=dag)
    terminate_cluster = EmrTerminateJobFlowOperator(
        task_id="terminate_cluster",
        trigger_rule="all_done",
        job_flow_id=cluster_id,
        aws_conn_id=aws_conn_id,
        dag=dag)

    with TaskGroup("run_immigration_mapping") as run_immigration_mapping:
        add_step, wait_step = emr_step_task_group(
            script_name='immigration_mapping',
            cluster_id=cluster_id,
            aws_conn_id=aws_conn_id,
            dag=dag)

    with TaskGroup("run_country") as run_country:
        add_step, wait_step = emr_step_task_group(script_name='country',
                                                  cluster_id=cluster_id,
                                                  aws_conn_id=aws_conn_id,
                                                  dag=dag)
        emr_conn_id='emr_default',
    )

    step_adder = EmrAddStepsOperator(
        task_id='add_steps',
        job_flow_id=cluster_creator.output,
        aws_conn_id='aws_default',
        steps=SPARK_STEPS,
    )

    step_checker = EmrStepSensor(
        task_id='watch_step',
        job_flow_id=cluster_creator.output,
        step_id="{{ task_instance.xcom_pull(task_ids='add_steps', key='return_value')[0] }}",
        aws_conn_id='aws_default',
    )

    cluster_remover = EmrTerminateJobFlowOperator(
        task_id='remove_cluster',
        job_flow_id=cluster_creator.output,
        aws_conn_id='aws_default',
    )

    step_adder >> step_checker >> cluster_remover
    # [END howto_operator_emr_manual_steps_tasks]

    # Task dependencies created via `XComArgs`:
    #   cluster_creator >> step_adder
    #   cluster_creator >> step_checker
    #   cluster_creator >> cluster_remover
Example #7
0
        job_flow_overrides=cluster_conf)

    add_step_task = EmrAddStepsOperator(
        task_id='My_first_job',
        job_flow_id=
        "{{ task_instance.xcom_pull(task_ids='create_job_flow', key='return_value') }}",
        aws_conn_id='my_aws_conn',
        steps=my_first_job)

    watch_prev_step_task = EmrStepSensor(
        task_id='watch_prev_step',
        job_flow_id=
        "{{task_instance.xcom_pull(task_ids='create_job_flow', key='return_value')}}",
        step_id=
        "{{task_instance.xcom_pull(task_ids='My_first_job', key='return_value')}}",
        aws_conn_id='aws_default')

    terminate_job_flow_task = EmrTerminateJobFlowOperator(
        task_id='terminate_job_flow',
        job_flow_id=
        "{{task_instance.xcom_pull(task_ids='create_job_flow', key='return_value')}}",
        aws_conn_id='aws_default',
        trigger_rule="all_done")

#dependencies

check_data_exists_task >> create_job_flow_task
create_job_flow_task >> add_step_task
add_step_task >> watch_prev_step_task
watch_prev_step_task >> terminate_job_flow_task
Example #8
0
        task_id="wait_for_step_load_raw_data",
        job_flow_id=create_cluster.output,
        step_id="{{ task_instance.xcom_pull(task_ids='add_step_load_raw_data', key='return_value')[0] }}",
        aws_conn_id="aws_default",
    )

    add_step_transform = EmrAddStepsOperator(
        task_id="add_step_transform",
        job_flow_id=create_cluster.output,
        aws_conn_id="aws_default",
        steps=SparkSteps.TRANSFORM,
    )

    wait_for_step_transform = EmrStepSensor(
        task_id="wait_for_step_transform",
        job_flow_id=create_cluster.output,
        step_id="{{ task_instance.xcom_pull(task_ids='add_step_transform', key='return_value')[0] }}",
        aws_conn_id="aws_default",
    )

    terminate_cluster = EmrTerminateJobFlowOperator(
        task_id="terminate_cluster",
        job_flow_id=create_cluster.output,
        aws_conn_id="aws_default",
        trigger_rule=TriggerRule.ALL_DONE
    )

    add_step_load_raw_data >> wait_for_step_load_raw_data >> \
        add_step_transform >> wait_for_step_transform >> \
        terminate_cluster
    dag=dag)

emr_step_2 = EmrAddStepsOperator(
    task_id='emr_step2',
    job_flow_id=
    "{{ task_instance.xcom_pull(task_ids='create_job_flow', key='return_value') }}",
    aws_conn_id='aws_default',
    steps=step2,
    dag=dag)

emr_step_sensor = EmrStepSensor(
    task_id='watch_step',
    job_flow_id=
    "{{ task_instance.xcom_pull('create_job_flow', key='return_value') }}",
    step_id=
    "{{ task_instance.xcom_pull(task_ids='emr_step2', key='return_value')[0] }}",
    aws_conn_id='aws_default',
    dag=dag)

stop_emr_cluster = EmrTerminateJobFlowOperator(
    task_id='stop_emr1',
    job_flow_id=
    "{{ task_instance.xcom_pull(task_ids='create_job_flow', key='return_value') }}",
    aws_conn_id='aws_default',
    dag=dag)

create_emr_cluster >> emr_step_1
emr_step_1 >> emr_step_2
emr_step_2 >> emr_step_sensor
emr_step_sensor >> stop_emr_cluster