Example #1
0
    def test_check_task_dependencies(self, trigger_rule, successes, skipped,
                                     failed, upstream_failed, done,
                                     flag_upstream_failed, expect_state,
                                     expect_completed):
        start_date = datetime.datetime(2016, 2, 1, 0, 0, 0)
        dag = models.DAG('test-dag', start_date=start_date)
        downstream = DummyOperator(task_id='downstream',
                                   dag=dag,
                                   owner='airflow',
                                   trigger_rule=trigger_rule)
        for i in range(5):
            task = DummyOperator(task_id='runme_{}'.format(i),
                                 dag=dag,
                                 owner='airflow')
            task.set_downstream(downstream)
        run_date = task.start_date + datetime.timedelta(days=5)

        ti = TI(downstream, run_date)
        completed = ti.evaluate_trigger_rule(
            successes=successes,
            skipped=skipped,
            failed=failed,
            upstream_failed=upstream_failed,
            done=done,
            flag_upstream_failed=flag_upstream_failed)

        self.assertEqual(completed, expect_completed)
        self.assertEqual(ti.state, expect_state)
Example #2
0
    def test_infer_dag(self):
        dag = DAG('dag', start_date=DEFAULT_DATE)
        dag2 = DAG('dag2', start_date=DEFAULT_DATE)

        op1 = DummyOperator(task_id='test_op_1', owner='test')
        op2 = DummyOperator(task_id='test_op_2', owner='test')
        op3 = DummyOperator(task_id='test_op_3', owner='test', dag=dag)
        op4 = DummyOperator(task_id='test_op_4', owner='test', dag=dag2)

        # double check dags
        self.assertEqual(
            [i.has_dag() for i in [op1, op2, op3, op4]],
            [False, False, True, True])

        # can't combine operators with no dags
        self.assertRaises(AirflowException, op1.set_downstream, op2)

        # op2 should infer dag from op1
        op1.dag = dag
        op1.set_downstream(op2)
        self.assertIs(op2.dag, dag)

        # can't assign across multiple DAGs
        self.assertRaises(AirflowException, op1.set_downstream, op4)
        self.assertRaises(AirflowException, op1.set_downstream, [op3, op4])
    def test_check_task_dependencies(self, trigger_rule, successes, skipped,
                                     failed, upstream_failed, done,
                                     flag_upstream_failed,
                                     expect_state, expect_completed):
        start_date = datetime.datetime(2016, 2, 1, 0, 0, 0)
        dag = models.DAG('test-dag', start_date=start_date)
        downstream = DummyOperator(task_id='downstream',
                                   dag=dag, owner='airflow',
                                   trigger_rule=trigger_rule)
        for i in range(5):
            task = DummyOperator(task_id='runme_{}'.format(i),
                                 dag=dag, owner='airflow')
            task.set_downstream(downstream)
        run_date = task.start_date + datetime.timedelta(days=5)

        ti = TI(downstream, run_date)
        completed = ti.evaluate_trigger_rule(
            successes=successes, skipped=skipped, failed=failed,
            upstream_failed=upstream_failed, done=done,
            flag_upstream_failed=flag_upstream_failed)

        self.assertEqual(completed, expect_completed)
        self.assertEqual(ti.state, expect_state)
Example #4
0
    task_id='test_depends_on_past_2',
    depends_on_past=True,
    dag=dag6,
)
dag6_task2.set_upstream(dag6_task1)

# DAG tests that a deadlocked subdag is properly caught
dag7 = DAG(dag_id='test_subdag_deadlock', default_args=default_args)
subdag7 = DAG(dag_id='test_subdag_deadlock.subdag', default_args=default_args)
subdag7_task1 = PythonOperator(task_id='test_subdag_fail',
                               dag=subdag7,
                               python_callable=fail)
subdag7_task2 = DummyOperator(
    task_id='test_subdag_dummy_1',
    dag=subdag7,
)
subdag7_task3 = DummyOperator(task_id='test_subdag_dummy_2', dag=subdag7)
dag7_subdag1 = SubDagOperator(task_id='subdag', dag=dag7, subdag=subdag7)
subdag7_task1.set_downstream(subdag7_task2)
subdag7_task2.set_downstream(subdag7_task3)

# DAG tests that queued tasks are run
dag8 = DAG(dag_id='test_scheduled_queued_tasks',
           start_date=DEFAULT_DATE,
           end_date=DEFAULT_DATE,
           default_args=default_args)
dag8_task1 = PythonOperator(python_callable=fail,
                            task_id='test_queued_task',
                            dag=dag8,
                            pool='test_queued_pool')
from airflow.operators import BranchPythonOperator, DummyOperator
from airflow.models import DAG
from datetime import datetime, timedelta
import random

seven_days_ago = datetime.combine(datetime.today() - timedelta(7),
                                  datetime.min.time())
args = {
    'owner': 'airflow',
    'start_date': seven_days_ago,
}

dag = DAG(dag_id='example_branch_operator', default_args=args)

cmd = 'ls -l'
run_this_first = DummyOperator(task_id='run_this_first', dag=dag)

options = ['branch_a', 'branch_b', 'branch_c', 'branch_d']

branching = BranchPythonOperator(
    task_id='branching',
    python_callable=lambda: random.choice(options),
    dag=dag)
branching.set_upstream(run_this_first)

for option in options:
    t = DummyOperator(task_id=option, dag=dag)
    t.set_upstream(branching)
    dummy_follow = DummyOperator(task_id='follow_' + option, dag=dag)
    t.set_downstream(dummy_follow)
Example #6
0
seven_days_ago = datetime.combine(datetime.today() - timedelta(7),
                                  datetime.min.time())
args = {
    'owner': 'airflow',
    'start_date': seven_days_ago,
}

dag = DAG(dag_id='example_branch_operator',
          default_args=args,
          schedule_interval="@daily")

cmd = 'ls -l'
run_this_first = DummyOperator(task_id='run_this_first', dag=dag)

options = ['branch_a', 'branch_b', 'branch_c', 'branch_d']

branching = BranchPythonOperator(
    task_id='branching',
    python_callable=lambda: random.choice(options),
    dag=dag)
branching.set_upstream(run_this_first)

join = DummyOperator(task_id='join', trigger_rule='one_success', dag=dag)

for option in options:
    t = DummyOperator(task_id=option, dag=dag)
    t.set_upstream(branching)
    dummy_follow = DummyOperator(task_id='follow_' + option, dag=dag)
    t.set_downstream(dummy_follow)
    dummy_follow.set_downstream(join)
}

dag = DAG(
    dag_id='example_branch_operator',
    default_args=args,
    schedule_interval="@daily")

cmd = 'ls -l'
run_this_first = DummyOperator(task_id='run_this_first', dag=dag)

options = ['branch_a', 'branch_b', 'branch_c', 'branch_d']

branching = BranchPythonOperator(
    task_id='branching',
    python_callable=lambda: random.choice(options),
    dag=dag)
branching.set_upstream(run_this_first)

join = DummyOperator(
    task_id='join',
    trigger_rule='one_success',
    dag=dag
)

for option in options:
    t = DummyOperator(task_id=option, dag=dag)
    t.set_upstream(branching)
    dummy_follow = DummyOperator(task_id='follow_' + option, dag=dag)
    t.set_downstream(dummy_follow)
    dummy_follow.set_downstream(join)
Example #8
0
    t.set_upstream(dummy_op)
    t.set_downstream(create_temp_scores_table_op)

archive_trained_models_op = BashOperator(
    task_id='archive_trained_models',
    bash_command='scripts/bash/archive_trained_models.sh',
    dag=dag
)

notify_processing_completion_op = SlackAPIPostOperator(
    task_id='notify_processing_completion',
    token=Variable.get('slack_token'),
    channel='#engineering-commits',
    username='******',
    icon_url=Variable.get('tia_slack_icon_url'),
    text='*user_work_experience_job_posting_similarity_scores* has been refreshed on {{ts}}',
    dag=dag
)

create_temp_scores_table_op.set_downstream(copy_scores_to_temp_table_op)
copy_scores_to_temp_table_op.set_downstream(remove_scores_op)
copy_scores_to_temp_table_op.set_downstream(update_scores_op)
delete_temp_scores_table_op.set_upstream(remove_scores_op)
delete_temp_scores_table_op.set_upstream(update_scores_op)
delete_temp_scores_table_op.set_downstream(notify_processing_completion_op)

dummy_op.set_upstream(compute_title_feature_op)
dummy_op.set_upstream(compute_skill_feature_op)
dummy_op.set_upstream(compute_description_feature_op)
dummy_op.set_downstream(archive_trained_models_op)
Example #9
0
section_1 = SubDagOperator(
    task_id='section-1',
    subdag=subdag(DAG_NAME, 'section-1', args),
    default_args=args,
    dag=dag,
)

some_other_task = DummyOperator(
    task_id='some-other-task',
    default_args=args,
    dag=dag,
)

section_2 = SubDagOperator(
    task_id='section-2',
    subdag=subdag(DAG_NAME, 'section-2', args),
    default_args=args,
    dag=dag,
)

end = DummyOperator(
    task_id='end',
    default_args=args,
    dag=dag,
)

start.set_downstream(section_1)
section_1.set_downstream(some_other_task)
some_other_task.set_downstream(section_2)
section_2.set_downstream(end)
dag7 = DAG(dag_id='test_subdag_deadlock', default_args=default_args)
subdag7 = DAG(dag_id='test_subdag_deadlock.subdag', default_args=default_args)
subdag7_task1 = PythonOperator(
    task_id='test_subdag_fail',
    dag=subdag7,
    python_callable=fail)
subdag7_task2 = DummyOperator(
    task_id='test_subdag_dummy_1',
    dag=subdag7,)
subdag7_task3 = DummyOperator(
    task_id='test_subdag_dummy_2',
    dag=subdag7)
dag7_subdag1 = SubDagOperator(
    task_id='subdag',
    dag=dag7,
    subdag=subdag7)
subdag7_task1.set_downstream(subdag7_task2)
subdag7_task2.set_downstream(subdag7_task3)

# DAG tests that queued tasks are run
dag8 = DAG(
    dag_id='test_scheduled_queued_tasks',
    start_date=DEFAULT_DATE,
    end_date=DEFAULT_DATE,
    default_args=default_args)
dag8_task1 = PythonOperator(
    python_callable=fail,
    task_id='test_queued_task',
    dag=dag8,
    pool='test_queued_pool')
Example #11
0
    success_time_value="{{ macros.datetime.now() }}",
)
update_aws_userplatform_daily_status = gen_update_job_status_task(
    upstream_task_id_str=aws_userplatform_daily_taskid,
    report_name_value="aws_userplatform",
    report_time_type_value="daily",
    report_time_value="{{ ds }}",
    success_time_value="{{ macros.datetime.now() }}",
)


##set dependencies
start_task.set_downstream(
    [
        is_local_dsp_info_hourly_all_ready,
        is_local_dsp_simple_hourly_all_ready,
        is_aws_dsp_simple_hourly_all_ready,
        is_aws_dsp_info_hourly_all_ready,
    ]
)

# local dsp uv
local_dsp_uv_daily.set_upstream(is_local_dsp_simple_hourly_all_ready)
local_dsp_uv_daily.set_downstream(end_task)

# local dsp simple daily
local_dsp_simple_daily.set_upstream(is_local_dsp_simple_hourly_all_ready)
local_dsp_simple_daily.set_downstream(end_task)

# local dsp info daily
local_dsp_info_daily.set_upstream(is_local_dsp_simple_hourly_all_ready)
local_dsp_info_daily.set_downstream(end_task)
Example #12
0
    'owner': 'airflow',
    'start_date': seven_days_ago,
}

dag = DAG(
    dag_id='dag2',
    default_args=args,
    schedule_interval="30 17 * * *"  # 这里可以填crontab时间格式
)

task0 = DummyOperator(task_id='task0', dag=dag)

cmd = 'ls -l'
task1 = BashOperator(task_id='task1', bash_command=cmd, dag=dag)

task0.set_downstream(task1)

task2 = DummyOperator(trigger_rule='all_done',
                      task_id='task2',
                      dag=dag,
                      depends_on_past=True)

task2.set_upstream(task1)

task3 = DummyOperator(trigger_rule='all_done',
                      depends_on_past=True,
                      task_id='task3',
                      dag=dag)

task3.set_upstream(task2)
nothing_to_update_op = DummyOperator(
    task_id='nothing_to_update',
    dag=dag
)

check_job_posting_to_be_updated_op.set_downstream(check_to_remove_op)
check_job_posting_to_be_updated_op.set_downstream(check_to_update_op)

check_work_experience_to_be_updated_op.set_downstream(check_to_remove_op)
check_work_experience_to_be_updated_op.set_downstream(check_to_update_op)

update_scores_branch_op.set_upstream(check_to_update_op)
remove_scores_op.set_upstream(check_to_remove_op)
nothing_to_remove_op.set_upstream(check_to_remove_op)
nothing_to_update_op.set_upstream(check_to_update_op)

notify_processing_completion_op.set_upstream(nothing_to_remove_op)
notify_processing_completion_op.set_upstream(nothing_to_update_op)

update_scores_branch_op.set_downstream(compute_title_feature_op)
update_scores_branch_op.set_downstream(compute_skill_feature_op)
update_scores_branch_op.set_downstream(compute_description_feature_op)

compute_similarity_op.set_upstream(compute_title_feature_op)
compute_similarity_op.set_upstream(compute_skill_feature_op)
compute_similarity_op.set_upstream(compute_description_feature_op)
compute_similarity_op.set_downstream(update_scores_op)
notify_processing_completion_op.set_upstream(update_scores_op)
notify_processing_completion_op.set_upstream(remove_scores_op)
section_1 = SubDagOperator(
    task_id='section-1',
    subdag=subdag(DAG_NAME, 'section-1', args),
    default_args=args,
    dag=dag,
)

some_other_task = DummyOperator(
    task_id='some-other-task',
    default_args=args,
    dag=dag,
)

section_2 = SubDagOperator(
    task_id='section-2',
    subdag=subdag(DAG_NAME, 'section-2', args),
    default_args=args,
    dag=dag,
)

end = DummyOperator(
    task_id='end',
    default_args=args,
    dag=dag,
)

start.set_downstream(section_1)
section_1.set_downstream(some_other_task)
some_other_task.set_downstream(section_2)
section_2.set_downstream(end)