Esempio n. 1
0
    cluster_type="redshift",
    time_zone=local_tz)

wait_on_tasks_completed = ExternalTaskSensor(
    task_id="wait_on_tasks_completed",
    dag=dag,
    external_dag_id="03_data_quality_check_dag",
    external_task_id="end_task",
    mode="reschedule",
    poke_interval=120)

terminate_emr_cluster = AWSTerminateClusterOperator(
    task_id="terminate_emr_cluster",
    dag=dag,
    conn_id=AWS_CONN_ID,
    cluster_creation_task=DAG_NAME + ".create_emr_cluster",
    cluster_type="emr")

terminate_redshift_cluster = AWSTerminateClusterOperator(
    task_id="terminate_redshift_cluster",
    dag=dag,
    conn_id=AWS_CONN_ID,
    cluster_creation_task=DAG_NAME + ".create_redshift_cluster",
    cluster_type="redshift")

# set dependencies
wait_on_tasks_completed.set_upstream(
    [create_emr_cluster, create_redshift_cluster])
wait_on_tasks_completed.set_downstream(
    [terminate_emr_cluster, terminate_redshift_cluster])
Esempio n. 2
0
def set_dependencies(yaml_specs, tasks, latest_only=True, **kwargs):

    dependencies = get_dependencies(yaml_specs)
    external_dependencies = get_external_dependencies(yaml_specs)

    if latest_only:
        latest_only_operator = LatestOnlyOperator(task_id='latest_only',
                                                  dag=kwargs["dag"])

    #external_dependencies
    external_tasks = {}
    tasks_with_external_dependencies = []

    for task in external_dependencies:
        task_id = task["task_id"]
        tasks_with_external_dependencies.append(task_id)
        external_dependencies = task["external_dependencies"]

        for external_dependency in external_dependencies:
            external_dag, external_task = list(external_dependency.items())[0]
            wait_for_whole_dag = external_task == r"all"
            task_name = "wait_for_DAG_" + external_dag if wait_for_whole_dag else "wait_for_" + external_task

            if task_name not in external_tasks.keys():
                if wait_for_whole_dag:
                    wait_for_task = ExternalTaskSensor(
                        dag=kwargs["dag"],
                        task_id=task_name,
                        external_dag_id=external_dag,
                        external_task_id=None,
                        poke_interval=20,
                        timeout=60,
                        retries=25)
                    external_tasks[task_name] = wait_for_task
                else:
                    wait_for_task = ExternalTaskSensor(
                        dag=kwargs["dag"],
                        task_id=task_name,
                        external_dag_id=external_dag,
                        external_task_id=external_task,
                        poke_interval=60,
                        timeout=60,
                        retries=25)
                    external_tasks[task_name] = wait_for_task

                if latest_only:
                    wait_for_task.set_upstream(latest_only_operator)

            tasks[task_id].set_upstream(external_tasks[task_name])

    # local dependencies
    for task in dependencies:
        task_id = task["task_id"]
        spec_dependencies = task["dependencies"]
        valid_dependencies = [
            spec_dependency for spec_dependency in spec_dependencies
            if spec_dependency in tasks.keys() and spec_dependency != task_id
        ]

        if len(
                valid_dependencies
        ) == 0 and latest_only and task_id not in tasks_with_external_dependencies:
            tasks[task_id].set_upstream(latest_only_operator)
        else:
            for spec_dependency in valid_dependencies:
                tasks[task_id].set_upstream(tasks[spec_dependency])