Esempio n. 1
0
    def test_dagrun_success_when_all_skipped(self):
        """
        Tests that a DAG run succeeds when all tasks are skipped
        """
        dag = DAG(dag_id='test_dagrun_success_when_all_skipped',
                  start_date=timezone.datetime(2017, 1, 1))
        dag_task1 = ShortCircuitOperator(task_id='test_short_circuit_false',
                                         dag=dag,
                                         python_callable=lambda: False)
        dag_task2 = DummyOperator(task_id='test_state_skipped1', dag=dag)
        dag_task3 = DummyOperator(task_id='test_state_skipped2', dag=dag)
        dag_task1.set_downstream(dag_task2)
        dag_task2.set_downstream(dag_task3)

        initial_task_states = {
            'test_short_circuit_false': State.SUCCESS,
            'test_state_skipped1': State.SKIPPED,
            'test_state_skipped2': State.SKIPPED,
        }

        dag_run = self.create_dag_run(dag=dag,
                                      state=State.RUNNING,
                                      task_states=initial_task_states)
        updated_dag_state = dag_run.update_state()
        self.assertEqual(State.SUCCESS, updated_dag_state)
Esempio n. 2
0
    def test_dagrun_success_when_all_skipped(self):
        """
        Tests that a DAG run succeeds when all tasks are skipped
        """
        dag = DAG(
            dag_id='test_dagrun_success_when_all_skipped',
            start_date=datetime.datetime(2017, 1, 1)
        )
        dag_task1 = ShortCircuitOperator(
            task_id='test_short_circuit_false',
            dag=dag,
            python_callable=lambda: False)
        dag_task2 = DummyOperator(
            task_id='test_state_skipped1',
            dag=dag)
        dag_task3 = DummyOperator(
            task_id='test_state_skipped2',
            dag=dag)
        dag_task1.set_downstream(dag_task2)
        dag_task2.set_downstream(dag_task3)

        initial_task_states = {
            'test_short_circuit_false': State.SUCCESS,
            'test_state_skipped1': State.SKIPPED,
            'test_state_skipped2': State.SKIPPED,
        }

        dag_run = self.create_dag_run(dag=dag,
                                      state=State.RUNNING,
                                      task_states=initial_task_states)
        updated_dag_state = dag_run.update_state()
        self.assertEqual(State.SUCCESS, updated_dag_state)
Esempio n. 3
0
                <tr><td><b> Task ID: </b></td><td>{{ task_instance.task_id }}</td></tr>
                <tr><td><b> Execution Date: </b></td><td>{{ task_instance.execution_date }}</td></tr>
                <tr><td><b> Start Date: </b></td><td>{{ task_instance.start_date }}</td></tr>
                <tr><td><b> End Date: </b></td><td>{{ task_instance.end_date }}</td></tr>
                <tr><td><b> Host Name: </b></td><td>{{ task_instance.hostname }}</td></tr>
                <tr><td><b> Unix Name: </b></td><td>{{ task_instance.unixname }}</td></tr>
                <tr><td><b> Job ID: </b></td><td>{{ task_instance.job_id }}</td></tr>
                <tr><td><b> Queued Date Time: </b></td><td>{{ task_instance.queued_dttm }}</td></tr>
                <tr><td><b> Log URL: </b></td><td><a href="{{ task_instance.log_url }}">{{ task_instance.log_url }}</a></td></tr>
            </table>

            <h2>Processes Killed</h2>
            <ul>
            {% for process_killed in task_instance.xcom_pull(task_ids='kill_halted_tasks', key='kill_halted_tasks.processes_to_kill') %}
                <li>Process {{loop.index}}</li>
                <ul>
                {% for key, value in process_killed.iteritems() %}
                    <li>{{ key }}: {{ value }}</li>
                {% endfor %}
                </ul>
            {% endfor %}
            </ul>
        </body>
    </html>
    """,
    dag=dag)


kill_halted_tasks.set_downstream(email_or_not_branch)
email_or_not_branch.set_downstream(send_processes_killed_email)
# -*- coding: utf-8 -*-
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from datetime import datetime

from airflow.models import DAG
from airflow.operators.python_operator import ShortCircuitOperator
from airflow.operators.dummy_operator import DummyOperator

# DAG that has its short circuit op fail and skip multiple downstream tasks
dag = DAG(dag_id='test_dagrun_short_circuit_false',
          start_date=datetime(2017, 1, 1))
dag_task1 = ShortCircuitOperator(task_id='test_short_circuit_false',
                                 dag=dag,
                                 python_callable=lambda: False)
dag_task2 = DummyOperator(task_id='test_state_skipped1', dag=dag)
dag_task3 = DummyOperator(task_id='test_state_skipped2', dag=dag)
dag_task1.set_downstream(dag_task2)
dag_task2.set_downstream(dag_task3)
Esempio n. 5
0
def docker_move_subdag(host_top_dir, input_path, output_path):
    host_path = f"{host_top_dir}/{input_path}"

    with DAG("docker_backup_db",
             default_args=default_args,
             schedule_interval=timedelta(minutes=10)) as dag:

        locate_file_cmd = """
            sleep 10
            find {{params.source_location}} -type f  -printf "%f\n" | head -1
        """

        t_view = BashOperator(task_id="view_file",
                              bash_command=locate_file_cmd,
                              xcom_push=True,
                              params={"source_location": host_path})

        def is_data_available(*args, **kwargs):
            ti = kwargs["ti"]
            data = ti.xcom_pull(key=None, task_ids="view_file")
            return data is not None

        t_is_data_available = ShortCircuitOperator(
            task_id="check_if_data_available",
            python_callable=is_data_available)

        t_move = DockerOperator(
            api_version="auto",
            docker_url=
            "tcp://socat:2375",  # replace it with swarm/docker endpoint
            image="centos:latest",
            network_mode="bridge",
            volumes=[
                f"{host_path}:{input_path}",
                f"{host_top_dir}/{input_path}:{output_path}",
            ],
            command=[
                "/bin/bash",
                "-c",
                "/bin/sleep 30; "
                "/bin/mv {{params.source_location}}/{{ ti.xcom_pull('view_file') }} {{params.target_location}};"
                "/bin/echo '{{params.target_location}}/{{ ti.xcom_pull('view_file') }}';",
            ],
            task_id="move_data",
            xcom_push=True,
            params={
                "source_location": f"{input_path}",
                "target_location": f"{output_path}"
            },
        )

        print_templated_cmd = """
            cat {{ ti.xcom_pull('move_data') }}
        """

        t_print = DockerOperator(
            api_version="auto",
            docker_url="tcp://socat:2375",
            image="centos:latest",
            volumes=[f"{host_top_dir}/{output_path}:{output_path}"],
            command=print_templated_cmd,
            task_id="print",
        )

        t_view.set_downstream(t_is_data_available)
        t_is_data_available.set_downstream(t_move)
        t_move.set_downstream(t_print)
Esempio n. 6
0
	end.set_upstream(ssh_tasks)
	# if no hive scripts generrated short circuit step in the begining of main dag
	return ssh_dag
	
		
dag = DAG(
    's3_convert_json_to_parquet_emr_ssh',
    default_args=defautlt_args,
    dagrun_timeout=timedelta(hours=1),
    schedule_interval='0 3 * * *'
)

step_entities_partitions = ShortCircuitOperator(
		task_id='step_entities_partitions', 
		python_callable=gen_hive_scripts, 
		provide_context=True,
		dag=dag)
		
step_ssh_subdag = SubDagOperator(
	task_id='step_jobs_submit',
	subdag=get_sub_ssh_cmds_dag(dag, 'step_jobs_submit',defautlt_args),
	default_args=defautlt_args,
	dag=dag)
	
step_end = DummyOperator(
	task_id='ssh_end',
	dag=dag)

step_entities_partitions.set_downstream(step_ssh_subdag)
step_ssh_subdag.set_downstream(step_end)
            <table>
                <tr><td><b> Task ID: </b></td><td>{{ task_instance.task_id }}</td></tr>
                <tr><td><b> Execution Date: </b></td><td>{{ task_instance.execution_date }}</td></tr>
                <tr><td><b> Start Date: </b></td><td>{{ task_instance.start_date }}</td></tr>
                <tr><td><b> End Date: </b></td><td>{{ task_instance.end_date }}</td></tr>
                <tr><td><b> Host Name: </b></td><td>{{ task_instance.hostname }}</td></tr>
                <tr><td><b> Unix Name: </b></td><td>{{ task_instance.unixname }}</td></tr>
                <tr><td><b> Job ID: </b></td><td>{{ task_instance.job_id }}</td></tr>
                <tr><td><b> Queued Date Time: </b></td><td>{{ task_instance.queued_dttm }}</td></tr>
                <tr><td><b> Log URL: </b></td><td><a href="{{ task_instance.log_url }}">{{ task_instance.log_url }}</a></td></tr>
            </table>

            <h2>Processes Killed</h2>
            <ul>
            {% for process_killed in task_instance.xcom_pull(task_ids='kill_halted_tasks', key='kill_halted_tasks.processes_to_kill') %}
                <li>Process {{loop.index}}</li>
                <ul>
                {% for key, value in process_killed.iteritems() %}
                    <li>{{ key }}: {{ value }}</li>
                {% endfor %}
                </ul>
            {% endfor %}
            </ul>
        </body>
    </html>
    """,
    dag=DAG_OBJ)

KILL_HALTED_TASKS_OPR.set_downstream(EMAIL_OR_NOT_BRANCH_OPR)
EMAIL_OR_NOT_BRANCH_OPR.set_downstream(SEND_PROCESSES_KILLED_EMAIL_OPR)