def test_check_task_dependencies(self, trigger_rule, successes, skipped, failed, upstream_failed, done, flag_upstream_failed, expect_state, expect_completed): start_date = datetime.datetime(2016, 2, 1, 0, 0, 0) dag = models.DAG('test-dag', start_date=start_date) downstream = DummyOperator(task_id='downstream', dag=dag, owner='airflow', trigger_rule=trigger_rule) for i in range(5): task = DummyOperator(task_id='runme_{}'.format(i), dag=dag, owner='airflow') task.set_downstream(downstream) run_date = task.start_date + datetime.timedelta(days=5) ti = TI(downstream, run_date) completed = ti.evaluate_trigger_rule( successes=successes, skipped=skipped, failed=failed, upstream_failed=upstream_failed, done=done, flag_upstream_failed=flag_upstream_failed) self.assertEqual(completed, expect_completed) self.assertEqual(ti.state, expect_state)
def test_infer_dag(self): dag = DAG('dag', start_date=DEFAULT_DATE) dag2 = DAG('dag2', start_date=DEFAULT_DATE) op1 = DummyOperator(task_id='test_op_1', owner='test') op2 = DummyOperator(task_id='test_op_2', owner='test') op3 = DummyOperator(task_id='test_op_3', owner='test', dag=dag) op4 = DummyOperator(task_id='test_op_4', owner='test', dag=dag2) # double check dags self.assertEqual( [i.has_dag() for i in [op1, op2, op3, op4]], [False, False, True, True]) # can't combine operators with no dags self.assertRaises(AirflowException, op1.set_downstream, op2) # op2 should infer dag from op1 op1.dag = dag op1.set_downstream(op2) self.assertIs(op2.dag, dag) # can't assign across multiple DAGs self.assertRaises(AirflowException, op1.set_downstream, op4) self.assertRaises(AirflowException, op1.set_downstream, [op3, op4])
def test_check_task_dependencies(self, trigger_rule, successes, skipped, failed, upstream_failed, done, flag_upstream_failed, expect_state, expect_completed): start_date = datetime.datetime(2016, 2, 1, 0, 0, 0) dag = models.DAG('test-dag', start_date=start_date) downstream = DummyOperator(task_id='downstream', dag=dag, owner='airflow', trigger_rule=trigger_rule) for i in range(5): task = DummyOperator(task_id='runme_{}'.format(i), dag=dag, owner='airflow') task.set_downstream(downstream) run_date = task.start_date + datetime.timedelta(days=5) ti = TI(downstream, run_date) completed = ti.evaluate_trigger_rule( successes=successes, skipped=skipped, failed=failed, upstream_failed=upstream_failed, done=done, flag_upstream_failed=flag_upstream_failed) self.assertEqual(completed, expect_completed) self.assertEqual(ti.state, expect_state)
task_id='test_depends_on_past_2', depends_on_past=True, dag=dag6, ) dag6_task2.set_upstream(dag6_task1) # DAG tests that a deadlocked subdag is properly caught dag7 = DAG(dag_id='test_subdag_deadlock', default_args=default_args) subdag7 = DAG(dag_id='test_subdag_deadlock.subdag', default_args=default_args) subdag7_task1 = PythonOperator(task_id='test_subdag_fail', dag=subdag7, python_callable=fail) subdag7_task2 = DummyOperator( task_id='test_subdag_dummy_1', dag=subdag7, ) subdag7_task3 = DummyOperator(task_id='test_subdag_dummy_2', dag=subdag7) dag7_subdag1 = SubDagOperator(task_id='subdag', dag=dag7, subdag=subdag7) subdag7_task1.set_downstream(subdag7_task2) subdag7_task2.set_downstream(subdag7_task3) # DAG tests that queued tasks are run dag8 = DAG(dag_id='test_scheduled_queued_tasks', start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, default_args=default_args) dag8_task1 = PythonOperator(python_callable=fail, task_id='test_queued_task', dag=dag8, pool='test_queued_pool')
from airflow.operators import BranchPythonOperator, DummyOperator from airflow.models import DAG from datetime import datetime, timedelta import random seven_days_ago = datetime.combine(datetime.today() - timedelta(7), datetime.min.time()) args = { 'owner': 'airflow', 'start_date': seven_days_ago, } dag = DAG(dag_id='example_branch_operator', default_args=args) cmd = 'ls -l' run_this_first = DummyOperator(task_id='run_this_first', dag=dag) options = ['branch_a', 'branch_b', 'branch_c', 'branch_d'] branching = BranchPythonOperator( task_id='branching', python_callable=lambda: random.choice(options), dag=dag) branching.set_upstream(run_this_first) for option in options: t = DummyOperator(task_id=option, dag=dag) t.set_upstream(branching) dummy_follow = DummyOperator(task_id='follow_' + option, dag=dag) t.set_downstream(dummy_follow)
seven_days_ago = datetime.combine(datetime.today() - timedelta(7), datetime.min.time()) args = { 'owner': 'airflow', 'start_date': seven_days_ago, } dag = DAG(dag_id='example_branch_operator', default_args=args, schedule_interval="@daily") cmd = 'ls -l' run_this_first = DummyOperator(task_id='run_this_first', dag=dag) options = ['branch_a', 'branch_b', 'branch_c', 'branch_d'] branching = BranchPythonOperator( task_id='branching', python_callable=lambda: random.choice(options), dag=dag) branching.set_upstream(run_this_first) join = DummyOperator(task_id='join', trigger_rule='one_success', dag=dag) for option in options: t = DummyOperator(task_id=option, dag=dag) t.set_upstream(branching) dummy_follow = DummyOperator(task_id='follow_' + option, dag=dag) t.set_downstream(dummy_follow) dummy_follow.set_downstream(join)
} dag = DAG( dag_id='example_branch_operator', default_args=args, schedule_interval="@daily") cmd = 'ls -l' run_this_first = DummyOperator(task_id='run_this_first', dag=dag) options = ['branch_a', 'branch_b', 'branch_c', 'branch_d'] branching = BranchPythonOperator( task_id='branching', python_callable=lambda: random.choice(options), dag=dag) branching.set_upstream(run_this_first) join = DummyOperator( task_id='join', trigger_rule='one_success', dag=dag ) for option in options: t = DummyOperator(task_id=option, dag=dag) t.set_upstream(branching) dummy_follow = DummyOperator(task_id='follow_' + option, dag=dag) t.set_downstream(dummy_follow) dummy_follow.set_downstream(join)
t.set_upstream(dummy_op) t.set_downstream(create_temp_scores_table_op) archive_trained_models_op = BashOperator( task_id='archive_trained_models', bash_command='scripts/bash/archive_trained_models.sh', dag=dag ) notify_processing_completion_op = SlackAPIPostOperator( task_id='notify_processing_completion', token=Variable.get('slack_token'), channel='#engineering-commits', username='******', icon_url=Variable.get('tia_slack_icon_url'), text='*user_work_experience_job_posting_similarity_scores* has been refreshed on {{ts}}', dag=dag ) create_temp_scores_table_op.set_downstream(copy_scores_to_temp_table_op) copy_scores_to_temp_table_op.set_downstream(remove_scores_op) copy_scores_to_temp_table_op.set_downstream(update_scores_op) delete_temp_scores_table_op.set_upstream(remove_scores_op) delete_temp_scores_table_op.set_upstream(update_scores_op) delete_temp_scores_table_op.set_downstream(notify_processing_completion_op) dummy_op.set_upstream(compute_title_feature_op) dummy_op.set_upstream(compute_skill_feature_op) dummy_op.set_upstream(compute_description_feature_op) dummy_op.set_downstream(archive_trained_models_op)
section_1 = SubDagOperator( task_id='section-1', subdag=subdag(DAG_NAME, 'section-1', args), default_args=args, dag=dag, ) some_other_task = DummyOperator( task_id='some-other-task', default_args=args, dag=dag, ) section_2 = SubDagOperator( task_id='section-2', subdag=subdag(DAG_NAME, 'section-2', args), default_args=args, dag=dag, ) end = DummyOperator( task_id='end', default_args=args, dag=dag, ) start.set_downstream(section_1) section_1.set_downstream(some_other_task) some_other_task.set_downstream(section_2) section_2.set_downstream(end)
dag7 = DAG(dag_id='test_subdag_deadlock', default_args=default_args) subdag7 = DAG(dag_id='test_subdag_deadlock.subdag', default_args=default_args) subdag7_task1 = PythonOperator( task_id='test_subdag_fail', dag=subdag7, python_callable=fail) subdag7_task2 = DummyOperator( task_id='test_subdag_dummy_1', dag=subdag7,) subdag7_task3 = DummyOperator( task_id='test_subdag_dummy_2', dag=subdag7) dag7_subdag1 = SubDagOperator( task_id='subdag', dag=dag7, subdag=subdag7) subdag7_task1.set_downstream(subdag7_task2) subdag7_task2.set_downstream(subdag7_task3) # DAG tests that queued tasks are run dag8 = DAG( dag_id='test_scheduled_queued_tasks', start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, default_args=default_args) dag8_task1 = PythonOperator( python_callable=fail, task_id='test_queued_task', dag=dag8, pool='test_queued_pool')
success_time_value="{{ macros.datetime.now() }}", ) update_aws_userplatform_daily_status = gen_update_job_status_task( upstream_task_id_str=aws_userplatform_daily_taskid, report_name_value="aws_userplatform", report_time_type_value="daily", report_time_value="{{ ds }}", success_time_value="{{ macros.datetime.now() }}", ) ##set dependencies start_task.set_downstream( [ is_local_dsp_info_hourly_all_ready, is_local_dsp_simple_hourly_all_ready, is_aws_dsp_simple_hourly_all_ready, is_aws_dsp_info_hourly_all_ready, ] ) # local dsp uv local_dsp_uv_daily.set_upstream(is_local_dsp_simple_hourly_all_ready) local_dsp_uv_daily.set_downstream(end_task) # local dsp simple daily local_dsp_simple_daily.set_upstream(is_local_dsp_simple_hourly_all_ready) local_dsp_simple_daily.set_downstream(end_task) # local dsp info daily local_dsp_info_daily.set_upstream(is_local_dsp_simple_hourly_all_ready) local_dsp_info_daily.set_downstream(end_task)
'owner': 'airflow', 'start_date': seven_days_ago, } dag = DAG( dag_id='dag2', default_args=args, schedule_interval="30 17 * * *" # 这里可以填crontab时间格式 ) task0 = DummyOperator(task_id='task0', dag=dag) cmd = 'ls -l' task1 = BashOperator(task_id='task1', bash_command=cmd, dag=dag) task0.set_downstream(task1) task2 = DummyOperator(trigger_rule='all_done', task_id='task2', dag=dag, depends_on_past=True) task2.set_upstream(task1) task3 = DummyOperator(trigger_rule='all_done', depends_on_past=True, task_id='task3', dag=dag) task3.set_upstream(task2)
nothing_to_update_op = DummyOperator( task_id='nothing_to_update', dag=dag ) check_job_posting_to_be_updated_op.set_downstream(check_to_remove_op) check_job_posting_to_be_updated_op.set_downstream(check_to_update_op) check_work_experience_to_be_updated_op.set_downstream(check_to_remove_op) check_work_experience_to_be_updated_op.set_downstream(check_to_update_op) update_scores_branch_op.set_upstream(check_to_update_op) remove_scores_op.set_upstream(check_to_remove_op) nothing_to_remove_op.set_upstream(check_to_remove_op) nothing_to_update_op.set_upstream(check_to_update_op) notify_processing_completion_op.set_upstream(nothing_to_remove_op) notify_processing_completion_op.set_upstream(nothing_to_update_op) update_scores_branch_op.set_downstream(compute_title_feature_op) update_scores_branch_op.set_downstream(compute_skill_feature_op) update_scores_branch_op.set_downstream(compute_description_feature_op) compute_similarity_op.set_upstream(compute_title_feature_op) compute_similarity_op.set_upstream(compute_skill_feature_op) compute_similarity_op.set_upstream(compute_description_feature_op) compute_similarity_op.set_downstream(update_scores_op) notify_processing_completion_op.set_upstream(update_scores_op) notify_processing_completion_op.set_upstream(remove_scores_op)
section_1 = SubDagOperator( task_id='section-1', subdag=subdag(DAG_NAME, 'section-1', args), default_args=args, dag=dag, ) some_other_task = DummyOperator( task_id='some-other-task', default_args=args, dag=dag, ) section_2 = SubDagOperator( task_id='section-2', subdag=subdag(DAG_NAME, 'section-2', args), default_args=args, dag=dag, ) end = DummyOperator( task_id='end', default_args=args, dag=dag, ) start.set_downstream(section_1) section_1.set_downstream(some_other_task) some_other_task.set_downstream(section_2) section_2.set_downstream(end)