def test_skipping(self): latest_task = LatestOnlyOperator( task_id='latest', dag=self.dag) downstream_task = DummyOperator( task_id='downstream', dag=self.dag) downstream_task.set_upstream(latest_task) latest_task.run(start_date=DEFAULT_DATE, end_date=END_DATE) downstream_task.run(start_date=DEFAULT_DATE, end_date=END_DATE) latest_instances = get_task_instances('latest') exec_date_to_latest_state = { ti.execution_date: ti.state for ti in latest_instances} assert exec_date_to_latest_state == { datetime.datetime(2016, 1, 1): 'success', datetime.datetime(2016, 1, 1, 12): 'success', datetime.datetime(2016, 1, 2): 'success', } downstream_instances = get_task_instances('downstream') exec_date_to_downstream_state = { ti.execution_date: ti.state for ti in downstream_instances} assert exec_date_to_downstream_state == { datetime.datetime(2016, 1, 1): 'skipped', datetime.datetime(2016, 1, 1, 12): 'skipped', datetime.datetime(2016, 1, 2): 'success', }
def test_skipping(self): latest_task = LatestOnlyOperator( task_id='latest', dag=self.dag) downstream_task = DummyOperator( task_id='downstream', dag=self.dag) downstream_task.set_upstream(latest_task) latest_task.run(start_date=DEFAULT_DATE, end_date=END_DATE) downstream_task.run(start_date=DEFAULT_DATE, end_date=END_DATE) latest_instances = get_task_instances('latest') exec_date_to_latest_state = { ti.execution_date: ti.state for ti in latest_instances} self.assertEqual({ datetime.datetime(2016, 1, 1): 'success', datetime.datetime(2016, 1, 1, 12): 'success', datetime.datetime(2016, 1, 2): 'success', }, exec_date_to_latest_state) downstream_instances = get_task_instances('downstream') exec_date_to_downstream_state = { ti.execution_date: ti.state for ti in downstream_instances} self.assertEqual({ datetime.datetime(2016, 1, 1): 'skipped', datetime.datetime(2016, 1, 1, 12): 'skipped', datetime.datetime(2016, 1, 2): 'success',}, exec_date_to_downstream_state)
def test_skipping_dagrun(self): latest_task = LatestOnlyOperator( task_id='latest', dag=self.dag) downstream_task = DummyOperator( task_id='downstream', dag=self.dag) downstream_task2 = DummyOperator( task_id='downstream_2', dag=self.dag) downstream_task.set_upstream(latest_task) downstream_task2.set_upstream(downstream_task) latest_task.run(start_date=DEFAULT_DATE, end_date=END_DATE) downstream_task.run(start_date=DEFAULT_DATE, end_date=END_DATE) downstream_task2.run(start_date=DEFAULT_DATE, end_date=END_DATE) latest_instances = get_task_instances('latest') self.dag_file_processor._process_task_instances(self.dag, task_instances_list=latest_instances) exec_date_to_latest_state = { ti.execution_date: ti.state for ti in latest_instances} self.assertEqual({ timezone.datetime(2016, 1, 1): 'success', timezone.datetime(2016, 1, 1, 12): 'success', timezone.datetime(2016, 1, 2): 'success'}, exec_date_to_latest_state) downstream_instances = get_task_instances('downstream') self.dag_file_processor._process_task_instances(self.dag, task_instances_list=downstream_instances) exec_date_to_downstream_state = { ti.execution_date: ti.state for ti in downstream_instances} self.assertEqual({ timezone.datetime(2016, 1, 1): 'skipped', timezone.datetime(2016, 1, 1, 12): 'skipped', timezone.datetime(2016, 1, 2): 'success'}, exec_date_to_downstream_state) downstream_instances = get_task_instances('downstream_2') self.dag_file_processor._process_task_instances(self.dag, task_instances_list=downstream_instances) exec_date_to_downstream_state = { ti.execution_date: ti.state for ti in downstream_instances} self.assertEqual({ timezone.datetime(2016, 1, 1): 'skipped', timezone.datetime(2016, 1, 1, 12): 'skipped', timezone.datetime(2016, 1, 2): 'success'}, exec_date_to_downstream_state)
def test_skipping_non_latest(self): latest_task = LatestOnlyOperator( task_id='latest', dag=self.dag) downstream_task = DummyOperator( task_id='downstream', dag=self.dag) downstream_task2 = DummyOperator( task_id='downstream_2', dag=self.dag) downstream_task3 = DummyOperator( task_id='downstream_3', trigger_rule=TriggerRule.NONE_FAILED, dag=self.dag) downstream_task.set_upstream(latest_task) downstream_task2.set_upstream(downstream_task) downstream_task3.set_upstream(downstream_task) self.dag.create_dagrun( run_id="scheduled__1", start_date=timezone.utcnow(), execution_date=DEFAULT_DATE, state=State.RUNNING, ) self.dag.create_dagrun( run_id="scheduled__2", start_date=timezone.utcnow(), execution_date=timezone.datetime(2016, 1, 1, 12), state=State.RUNNING, ) self.dag.create_dagrun( run_id="scheduled__3", start_date=timezone.utcnow(), execution_date=END_DATE, state=State.RUNNING, ) latest_task.run(start_date=DEFAULT_DATE, end_date=END_DATE) downstream_task.run(start_date=DEFAULT_DATE, end_date=END_DATE) downstream_task2.run(start_date=DEFAULT_DATE, end_date=END_DATE) downstream_task3.run(start_date=DEFAULT_DATE, end_date=END_DATE) latest_instances = get_task_instances('latest') exec_date_to_latest_state = { ti.execution_date: ti.state for ti in latest_instances} self.assertEqual({ timezone.datetime(2016, 1, 1): 'success', timezone.datetime(2016, 1, 1, 12): 'success', timezone.datetime(2016, 1, 2): 'success'}, exec_date_to_latest_state) downstream_instances = get_task_instances('downstream') exec_date_to_downstream_state = { ti.execution_date: ti.state for ti in downstream_instances} self.assertEqual({ timezone.datetime(2016, 1, 1): 'skipped', timezone.datetime(2016, 1, 1, 12): 'skipped', timezone.datetime(2016, 1, 2): 'success'}, exec_date_to_downstream_state) downstream_instances = get_task_instances('downstream_2') exec_date_to_downstream_state = { ti.execution_date: ti.state for ti in downstream_instances} self.assertEqual({ timezone.datetime(2016, 1, 1): None, timezone.datetime(2016, 1, 1, 12): None, timezone.datetime(2016, 1, 2): 'success'}, exec_date_to_downstream_state) downstream_instances = get_task_instances('downstream_3') exec_date_to_downstream_state = { ti.execution_date: ti.state for ti in downstream_instances} self.assertEqual({ timezone.datetime(2016, 1, 1): 'success', timezone.datetime(2016, 1, 1, 12): 'success', timezone.datetime(2016, 1, 2): 'success'}, exec_date_to_downstream_state)
def test_run(self): task = LatestOnlyOperator( task_id='latest', dag=self.dag) task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
def test_not_skipping_external(self): latest_task = LatestOnlyOperator( task_id='latest', dag=self.dag) downstream_task = DummyOperator( task_id='downstream', dag=self.dag) downstream_task2 = DummyOperator( task_id='downstream_2', dag=self.dag) downstream_task.set_upstream(latest_task) downstream_task2.set_upstream(downstream_task) self.dag.create_dagrun( run_id="manual__1", start_date=timezone.utcnow(), execution_date=DEFAULT_DATE, state=State.RUNNING, external_trigger=True, ) self.dag.create_dagrun( run_id="manual__2", start_date=timezone.utcnow(), execution_date=timezone.datetime(2016, 1, 1, 12), state=State.RUNNING, external_trigger=True, ) self.dag.create_dagrun( run_id="manual__3", start_date=timezone.utcnow(), execution_date=END_DATE, state=State.RUNNING, external_trigger=True, ) latest_task.run(start_date=DEFAULT_DATE, end_date=END_DATE) downstream_task.run(start_date=DEFAULT_DATE, end_date=END_DATE) downstream_task2.run(start_date=DEFAULT_DATE, end_date=END_DATE) latest_instances = get_task_instances('latest') exec_date_to_latest_state = { ti.execution_date: ti.state for ti in latest_instances} self.assertEqual({ timezone.datetime(2016, 1, 1): 'success', timezone.datetime(2016, 1, 1, 12): 'success', timezone.datetime(2016, 1, 2): 'success'}, exec_date_to_latest_state) downstream_instances = get_task_instances('downstream') exec_date_to_downstream_state = { ti.execution_date: ti.state for ti in downstream_instances} self.assertEqual({ timezone.datetime(2016, 1, 1): 'success', timezone.datetime(2016, 1, 1, 12): 'success', timezone.datetime(2016, 1, 2): 'success'}, exec_date_to_downstream_state) downstream_instances = get_task_instances('downstream_2') exec_date_to_downstream_state = { ti.execution_date: ti.state for ti in downstream_instances} self.assertEqual({ timezone.datetime(2016, 1, 1): 'success', timezone.datetime(2016, 1, 1, 12): 'success', timezone.datetime(2016, 1, 2): 'success'}, exec_date_to_downstream_state)
def test_skipping_dagrun(self): latest_task = LatestOnlyOperator( task_id='latest', dag=self.dag) downstream_task = DummyOperator( task_id='downstream', dag=self.dag) downstream_task2 = DummyOperator( task_id='downstream_2', dag=self.dag) downstream_task.set_upstream(latest_task) downstream_task2.set_upstream(downstream_task) self.dag.create_dagrun( run_id="manual__1", start_date=timezone.utcnow(), execution_date=DEFAULT_DATE, state=State.RUNNING ) self.dag.create_dagrun( run_id="manual__2", start_date=timezone.utcnow(), execution_date=timezone.datetime(2016, 1, 1, 12), state=State.RUNNING ) self.dag.create_dagrun( run_id="manual__3", start_date=timezone.utcnow(), execution_date=END_DATE, state=State.RUNNING ) latest_task.run(start_date=DEFAULT_DATE, end_date=END_DATE) downstream_task.run(start_date=DEFAULT_DATE, end_date=END_DATE) downstream_task2.run(start_date=DEFAULT_DATE, end_date=END_DATE) latest_instances = get_task_instances('latest') exec_date_to_latest_state = { ti.execution_date: ti.state for ti in latest_instances} self.assertEqual({ timezone.datetime(2016, 1, 1): 'success', timezone.datetime(2016, 1, 1, 12): 'success', timezone.datetime(2016, 1, 2): 'success'}, exec_date_to_latest_state) downstream_instances = get_task_instances('downstream') exec_date_to_downstream_state = { ti.execution_date: ti.state for ti in downstream_instances} self.assertEqual({ timezone.datetime(2016, 1, 1): 'skipped', timezone.datetime(2016, 1, 1, 12): 'skipped', timezone.datetime(2016, 1, 2): 'success'}, exec_date_to_downstream_state) downstream_instances = get_task_instances('downstream_2') exec_date_to_downstream_state = { ti.execution_date: ti.state for ti in downstream_instances} self.assertEqual({ timezone.datetime(2016, 1, 1): 'skipped', timezone.datetime(2016, 1, 1, 12): 'skipped', timezone.datetime(2016, 1, 2): 'success'}, exec_date_to_downstream_state)