def _collect_errors(self, ti_status: _DagRunTaskStatus, session=None): def tabulate_ti_keys_set(ti_keys: Iterable[TaskInstanceKey]) -> str: # Sorting by execution date first sorted_ti_keys: Any = sorted( ti_keys, key=lambda ti_key: ( ti_key.run_id, ti_key.dag_id, ti_key.task_id, ti_key.map_index, ti_key.try_number, ), ) if all(key.map_index == -1 for key in ti_keys): headers = ["DAG ID", "Task ID", "Run ID", "Try number"] sorted_ti_keys = map(lambda k: k[0:4], sorted_ti_keys) else: headers = [ "DAG ID", "Task ID", "Run ID", "Map Index", "Try number" ] return tabulate(sorted_ti_keys, headers=headers) err = '' if ti_status.failed: err += "Some task instances failed:\n" err += tabulate_ti_keys_set(ti_status.failed) if ti_status.deadlocked: err += 'BackfillJob is deadlocked.' deadlocked_depends_on_past = any( t.are_dependencies_met( dep_context=DepContext(ignore_depends_on_past=False), session=session, verbose=self.verbose, ) != t.are_dependencies_met(dep_context=DepContext( ignore_depends_on_past=True), session=session, verbose=self.verbose) for t in ti_status.deadlocked) if deadlocked_depends_on_past: err += ( 'Some of the deadlocked tasks were unable to run because ' 'of "depends_on_past" relationships. Try running the ' 'backfill with the option ' '"ignore_first_depends_on_past=True" or passing "-I" at ' 'the command line.') err += '\nThese tasks have succeeded:\n' err += tabulate_ti_keys_set(ti_status.succeeded) err += '\n\nThese tasks are running:\n' err += tabulate_ti_keys_set(ti_status.running) err += '\n\nThese tasks have failed:\n' err += tabulate_ti_keys_set(ti_status.failed) err += '\n\nThese tasks are skipped:\n' err += tabulate_ti_keys_set(ti_status.skipped) err += '\n\nThese tasks are deadlocked:\n' err += tabulate_ti_keys_set( [ti.key for ti in ti_status.deadlocked]) return err
def _collect_errors(self, ti_status, session=None): err = '' if ti_status.failed: err += ("---------------------------------------------------\n" "Some task instances failed:\n{}\n".format( ti_status.failed)) if ti_status.deadlocked: err += ('---------------------------------------------------\n' 'BackfillJob is deadlocked.') deadlocked_depends_on_past = any( t.are_dependencies_met(dep_context=DepContext( ignore_depends_on_past=False), session=session, verbose=self.verbose) != t.are_dependencies_met(dep_context=DepContext( ignore_depends_on_past=True), session=session, verbose=self.verbose) for t in ti_status.deadlocked) if deadlocked_depends_on_past: err += ( 'Some of the deadlocked tasks were unable to run because ' 'of "depends_on_past" relationships. Try running the ' 'backfill with the option ' '"ignore_first_depends_on_past=True" or passing "-I" at ' 'the command line.') err += ' These tasks have succeeded:\n{}\n'.format( ti_status.succeeded) err += ' These tasks are running:\n{}\n'.format(ti_status.running) err += ' These tasks have failed:\n{}\n'.format(ti_status.failed) err += ' These tasks are skipped:\n{}\n'.format(ti_status.skipped) err += ' These tasks are deadlocked:\n{}\n'.format( ti_status.deadlocked) return err
def _collect_errors(self, ti_status, session=None): def tabulate_ti_keys_set(set_ti_keys: Set[TaskInstanceKey]) -> str: # Sorting by execution date first sorted_ti_keys = sorted( set_ti_keys, key=lambda ti_key: (ti_key.run_id, ti_key.dag_id, ti_key. task_id, ti_key.try_number), ) return tabulate( sorted_ti_keys, headers=["DAG ID", "Task ID", "Run ID", "Try number"]) def tabulate_tis_set(set_tis: Set[TaskInstance]) -> str: # Sorting by execution date first sorted_tis = sorted( set_tis, key=lambda ti: (ti.run_id, ti.dag_id, ti.task_id, ti.try_number)) tis_values = ((ti.dag_id, ti.task_id, ti.run_id, ti.try_number) for ti in sorted_tis) return tabulate( tis_values, headers=["DAG ID", "Task ID", "Run ID", "Try number"]) err = '' if ti_status.failed: err += "Some task instances failed:\n" err += tabulate_ti_keys_set(ti_status.failed) if ti_status.deadlocked: err += 'BackfillJob is deadlocked.' deadlocked_depends_on_past = any( t.are_dependencies_met( dep_context=DepContext(ignore_depends_on_past=False), session=session, verbose=self.verbose, ) != t.are_dependencies_met(dep_context=DepContext( ignore_depends_on_past=True), session=session, verbose=self.verbose) for t in ti_status.deadlocked) if deadlocked_depends_on_past: err += ( 'Some of the deadlocked tasks were unable to run because ' 'of "depends_on_past" relationships. Try running the ' 'backfill with the option ' '"ignore_first_depends_on_past=True" or passing "-I" at ' 'the command line.') err += '\nThese tasks have succeeded:\n' err += tabulate_ti_keys_set(ti_status.succeeded) err += '\n\nThese tasks are running:\n' err += tabulate_ti_keys_set(ti_status.running) err += '\n\nThese tasks have failed:\n' err += tabulate_ti_keys_set(ti_status.failed) err += '\n\nThese tasks are skipped:\n' err += tabulate_ti_keys_set(ti_status.skipped) err += '\n\nThese tasks are deadlocked:\n' err += tabulate_tis_set(ti_status.deadlocked) return err
def _collect_errors(self, ti_status, session=None): err = "" if ti_status.failed: dr = get_databand_run() upstream_failed = [] failed = [] for fail_info in ti_status.failed: airflow_task_id = fail_info[1] task_run = dr.get_task_run(airflow_task_id) task_name = task_run.task.task_name if task_run.task_run_state == State.UPSTREAM_FAILED: # we don't want to show upstream failed in the list upstream_failed.append(task_name) else: failed.append(task_name) if upstream_failed: err += ( "Task that didn't run because " "of failed dependency:\n\t{}\n".format("\n\t".join(upstream_failed)) ) if failed: err += "Failed tasks are:\n\t{}".format("\n\t".join(failed)) if ti_status.deadlocked: err += ( "---------------------------------------------------\n" "DagRunJob is deadlocked." ) deadlocked_depends_on_past = any( t.are_dependencies_met( dep_context=DepContext(ignore_depends_on_past=False), session=session, verbose=self.verbose, ) != t.are_dependencies_met( dep_context=DepContext(ignore_depends_on_past=True), session=session, verbose=self.verbose, ) for t in ti_status.deadlocked ) if deadlocked_depends_on_past: err += ( "Some of the deadlocked tasks were unable to run because " 'of "depends_on_past" relationships. Try running the ' "backfill with the option " '"ignore_first_depends_on_past=True" or passing "-I" at ' "the command line." ) err += " These tasks have succeeded:\n{}\n".format(ti_status.succeeded) err += " These tasks are running:\n{}\n".format(ti_status.running) err += " These tasks have failed:\n{}\n".format(ti_status.failed) err += " These tasks are skipped:\n{}\n".format(ti_status.skipped) err += " These tasks are deadlocked:\n{}\n".format(ti_status.deadlocked) return err
def _get_dep_statuses(self, ti, session, dep_context: DepContext): # Checking that all upstream dependencies have succeeded if not ti.task.upstream_list: yield self._passing_status( reason="The task instance did not have any upstream tasks.") return if ti.task.trigger_rule == TR.ALWAYS: yield self._passing_status( reason="The task had a always trigger rule set.") return # see if the task name is in the task upstream for our task successes, skipped, failed, upstream_failed, done = self._get_states_count_upstream_ti( ti=ti, finished_tis=dep_context.ensure_finished_tis( ti.get_dagrun(session), session)) yield from self._evaluate_trigger_rule( ti=ti, successes=successes, skipped=skipped, failed=failed, upstream_failed=upstream_failed, done=done, flag_upstream_failed=dep_context.flag_upstream_failed, session=session, )
def get_dep_statuses(self, ti, session, dep_context=None): """ Wrapper around the private _get_dep_statuses method that contains some global checks for all dependencies. :param ti: the task instance to get the dependency status for :type ti: TaskInstance :param session: database session :type session: Session :param dep_context: the context for which this dependency should be evaluated for :type dep_context: DepContext """ # this avoids a circular dependency from airflow.ti_deps.dep_context import DepContext if dep_context is None: dep_context = DepContext() if self.IGNOREABLE and dep_context.ignore_all_deps: yield self._passing_status( reason="Context specified all dependencies should be ignored.") raise StopIteration if self.IS_TASK_DEP and dep_context.ignore_task_deps: yield self._passing_status( reason="Context specified all task dependencies should be ignored.") raise StopIteration for dep_status in self._get_dep_statuses(ti, session, dep_context): yield dep_status
def _process_finished_ti(session, ti): """ Process the TaskInstance object which already finished. :param session: :param ti: :return: """ from airflow.ti_deps.deps.runnable_exec_date_dep import RunnableExecDateDep from airflow.ti_deps.deps.valid_state_dep import ValidStateDep from airflow.ti_deps.deps.events_dep import EventTIDep EVENT_SCHEDULED_DEPS = { RunnableExecDateDep(), ValidStateDep(FINISHED_STATES), EventTIDep(), } dep_context = DepContext(deps=EVENT_SCHEDULED_DEPS) if ti.are_dependencies_met(dep_context=dep_context, session=session): ts = TaskState.query_task_state(ti, session=session) if ts.action is None or TaskAction(ts.action) == TaskAction.NONE: return if TaskAction(ts.action) == TaskAction.RESTART: log.debug('Queuing Finished task: %s', ti) ti.state = State.SCHEDULED log.info("Creating / updating %s in ORM", ti) session.merge(ti) ts.action = None session.merge(ts) session.commit()
def get_dep_statuses(self, ti, session, dep_context=None): """ Wrapper around the private _get_dep_statuses method that contains some global checks for all dependencies. :param ti: the task instance to get the dependency status for :type ti: airflow.models.TaskInstance :param session: database session :type session: sqlalchemy.orm.session.Session :param dep_context: the context for which this dependency should be evaluated for :type dep_context: DepContext """ if dep_context is None: dep_context = DepContext() if self.IGNOREABLE and dep_context.ignore_all_deps: yield self._passing_status( reason="Context specified all dependencies should be ignored.") return if self.IS_TASK_DEP and dep_context.ignore_task_deps: yield self._passing_status( reason= "Context specified all task dependencies should be ignored.") return yield from self._get_dep_statuses(ti, session, dep_context)
def test_not_reached_concurrency(self): task = self._get_task(start_date=datetime(2016, 1, 1), task_concurrency=1) dep_context = DepContext() ti = Mock(task=task, execution_date=datetime(2016, 1, 1)) ti.get_num_running_task_instances = lambda x: 0 assert TaskConcurrencyDep().is_met(ti=ti, dep_context=dep_context)
def _get_ready_tis( self, scheduleable_tasks: List[TI], finished_tasks: List[TI], session: Session, ) -> Tuple[List[TI], bool]: old_states = {} ready_tis: List[TI] = [] changed_tis = False if not scheduleable_tasks: return ready_tis, changed_tis # Check dependencies for st in scheduleable_tasks: old_state = st.state if st.are_dependencies_met(dep_context=DepContext( flag_upstream_failed=True, finished_tasks=finished_tasks), session=session): ready_tis.append(st) else: old_states[st.key] = old_state # Check if any ti changed state tis_filter = TI.filter_for_tis(old_states.keys()) if tis_filter is not None: fresh_tis = session.query(TI).filter(tis_filter).all() changed_tis = any(ti.state != old_states[ti.key] for ti in fresh_tis) return ready_tis, changed_tis
def test_parent_skip_branch(): """ A simple DAG with a BranchPythonOperator that does not follow op2. NotPreviouslySkippedDep is not met. """ with create_session() as session: session.query(DagRun).delete() session.query(TaskInstance).delete() start_date = pendulum.datetime(2020, 1, 1) dag = DAG("test_parent_skip_branch_dag", schedule_interval=None, start_date=start_date) dag.create_dagrun(run_type=DagRunType.MANUAL, state=State.RUNNING, execution_date=start_date) op1 = BranchPythonOperator(task_id="op1", python_callable=lambda: "op3", dag=dag) op2 = DummyOperator(task_id="op2", dag=dag) op3 = DummyOperator(task_id="op3", dag=dag) op1 >> [op2, op3] TaskInstance(op1, start_date).run() ti2 = TaskInstance(op2, start_date) dep = NotPreviouslySkippedDep() assert len(list(dep.get_dep_statuses(ti2, session, DepContext()))) == 1 session.commit() assert not dep.is_met(ti2, session) assert ti2.state == State.SKIPPED
def _get_ready_tis( self, schedulable_tis: List[TI], finished_tis: List[TI], session: Session, ) -> Tuple[List[TI], bool]: old_states = {} ready_tis: List[TI] = [] changed_tis = False if not schedulable_tis: return ready_tis, changed_tis # If we expand TIs, we need a new list so that we iterate over them too. (We can't alter # `schedulable_tis` in place and have the `for` loop pick them up expanded_tis: List[TI] = [] dep_context = DepContext( flag_upstream_failed=True, ignore_unmapped_tasks= True, # Ignore this Dep, as we will expand it if we can. finished_tis=finished_tis, ) # Check dependencies for schedulable in itertools.chain(schedulable_tis, expanded_tis): old_state = schedulable.state if schedulable.are_dependencies_met(session=session, dep_context=dep_context): ready_tis.append(schedulable) else: old_states[schedulable.key] = old_state continue # Expansion of last resort! This is ideally handled in the mini-scheduler in LocalTaskJob, but if # for any reason it wasn't, we need to expand it now if schedulable.map_index < 0 and schedulable.task.is_mapped: # HACK. This needs a better way, one that copes with multiple upstreams! for ti in finished_tis: if schedulable.task_id in ti.task.downstream_task_ids: assert isinstance(schedulable.task, MappedOperator) new_tis = schedulable.task.expand_mapped_task( self.run_id, session=session) if schedulable.state == TaskInstanceState.SKIPPED: # Task is now skipped (likely cos upstream returned 0 tasks continue assert new_tis[0] is schedulable expanded_tis.extend(new_tis[1:]) break # Check if any ti changed state tis_filter = TI.filter_for_tis(old_states.keys()) if tis_filter is not None: fresh_tis = session.query(TI).filter(tis_filter).all() changed_tis = any(ti.state != old_states[ti.key] for ti in fresh_tis) return ready_tis, changed_tis
def test_get_states_count_upstream_ti(self): """ this test tests the helper function '_get_states_count_upstream_ti' as a unit and inside update_state """ from airflow.ti_deps.dep_context import DepContext get_states_count_upstream_ti = TriggerRuleDep._get_states_count_upstream_ti session = settings.Session() now = timezone.utcnow() dag = DAG( 'test_dagrun_with_pre_tis', start_date=DEFAULT_DATE, default_args={'owner': 'owner1'}) with dag: op1 = DummyOperator(task_id='A') op2 = DummyOperator(task_id='B') op3 = DummyOperator(task_id='C') op4 = DummyOperator(task_id='D') op5 = DummyOperator(task_id='E', trigger_rule=TriggerRule.ONE_FAILED) op1.set_downstream([op2, op3]) # op1 >> op2, op3 op4.set_upstream([op3, op2]) # op3, op2 >> op4 op5.set_upstream([op2, op3, op4]) # (op2, op3, op4) >> op5 clear_db_runs() dag.clear() dr = dag.create_dagrun(run_id='test_dagrun_with_pre_tis', state=State.RUNNING, execution_date=now, start_date=now) ti_op1 = TaskInstance(task=dag.get_task(op1.task_id), execution_date=dr.execution_date) ti_op2 = TaskInstance(task=dag.get_task(op2.task_id), execution_date=dr.execution_date) ti_op3 = TaskInstance(task=dag.get_task(op3.task_id), execution_date=dr.execution_date) ti_op4 = TaskInstance(task=dag.get_task(op4.task_id), execution_date=dr.execution_date) ti_op5 = TaskInstance(task=dag.get_task(op5.task_id), execution_date=dr.execution_date) ti_op1.set_state(state=State.SUCCESS, session=session) ti_op2.set_state(state=State.FAILED, session=session) ti_op3.set_state(state=State.SUCCESS, session=session) ti_op4.set_state(state=State.SUCCESS, session=session) ti_op5.set_state(state=State.SUCCESS, session=session) session.commit() # check handling with cases that tasks are triggered from backfill with no finished tasks finished_tasks = DepContext().ensure_finished_tasks(ti_op2.task.dag, ti_op2.execution_date, session) self.assertEqual(get_states_count_upstream_ti(finished_tasks=finished_tasks, ti=ti_op2), (1, 0, 0, 0, 1)) finished_tasks = dr.get_task_instances(state=State.finished() + [State.UPSTREAM_FAILED], session=session) self.assertEqual(get_states_count_upstream_ti(finished_tasks=finished_tasks, ti=ti_op4), (1, 0, 1, 0, 2)) self.assertEqual(get_states_count_upstream_ti(finished_tasks=finished_tasks, ti=ti_op5), (2, 0, 1, 0, 3)) dr.update_state() self.assertEqual(State.SUCCESS, dr.state)
def test_all_met(self): """ Test to make sure all the conditions for the dep are met """ task = self._get_task(depends_on_past=True, start_date=datetime(2016, 1, 1), wait_for_downstream=True) prev_ti = Mock(state=State.SUCCESS, are_dependents_done=Mock(return_value=True)) ti = Mock(task=task, execution_date=datetime(2016, 1, 2), **{'get_previous_ti.return_value': prev_ti}) dep_context = DepContext(ignore_depends_on_past=False) assert PrevDagrunDep().is_met(ti=ti, dep_context=dep_context)
def _get_ready_tis( self, schedulable_tis: List[TI], finished_tis: List[TI], session: Session, ) -> Tuple[List[TI], bool, bool]: old_states = {} ready_tis: List[TI] = [] changed_tis = False if not schedulable_tis: return ready_tis, changed_tis, False # If we expand TIs, we need a new list so that we iterate over them too. (We can't alter # `schedulable_tis` in place and have the `for` loop pick them up additional_tis: List[TI] = [] dep_context = DepContext( flag_upstream_failed=True, ignore_unmapped_tasks= True, # Ignore this Dep, as we will expand it if we can. finished_tis=finished_tis, ) # Check dependencies. expansion_happened = False for schedulable in itertools.chain(schedulable_tis, additional_tis): old_state = schedulable.state if not schedulable.are_dependencies_met(session=session, dep_context=dep_context): old_states[schedulable.key] = old_state continue # If schedulable is from a mapped task, but not yet expanded, do it # now. This is called in two places: First and ideally in the mini # scheduler at the end of LocalTaskJob, and then as an "expansion of # last resort" in the scheduler to ensure that the mapped task is # correctly expanded before executed. if schedulable.map_index < 0 and isinstance( schedulable.task, MappedOperator): expanded_tis, _ = schedulable.task.expand_mapped_task( self.run_id, session=session) if expanded_tis: assert expanded_tis[0] is schedulable additional_tis.extend(expanded_tis[1:]) expansion_happened = True if schedulable.state in SCHEDULEABLE_STATES: ready_tis.append(schedulable) # Check if any ti changed state tis_filter = TI.filter_for_tis(old_states) if tis_filter is not None: fresh_tis = session.query(TI).filter(tis_filter).all() changed_tis = any(ti.state != old_states[ti.key] for ti in fresh_tis) return ready_tis, changed_tis, expansion_happened
def _are_premature_tis(self, unfinished_tasks, finished_tasks, session): # there might be runnable tasks that are up for retry and from some reason(retry delay, etc) are # not ready yet so we set the flags to count them in for ut in unfinished_tasks: if ut.are_dependencies_met(dep_context=DepContext( flag_upstream_failed=True, ignore_in_retry_period=True, ignore_in_reschedule_period=True, finished_tasks=finished_tasks), session=session): return True
def test_failed_wait_for_downstream(self): """ If the previous TI specified to wait for the downstream tasks of the previous dagrun then it should fail this dep if the downstream TIs of the previous TI are not done. """ task = self._get_task(depends_on_past=True, start_date=datetime(2016, 1, 1), wait_for_downstream=True) prev_ti = Mock(state=State.SUCCESS, are_dependents_done=Mock(return_value=False)) ti = Mock(task=task, previous_ti=prev_ti, execution_date=datetime(2016, 1, 2)) dep_context = DepContext(ignore_depends_on_past=False) assert not PrevDagrunDep().is_met(ti=ti, dep_context=dep_context)
def test_prev_ti_bad_state(self): """ If the previous TI did not complete execution this dep should fail. """ task = self._get_task( depends_on_past=True, start_date=datetime(2016, 1, 1), wait_for_downstream=False ) prev_ti = Mock(state=State.NONE, are_dependents_done=Mock(return_value=True)) ti = Mock(task=task, previous_ti=prev_ti, execution_date=datetime(2016, 1, 2)) dep_context = DepContext(ignore_depends_on_past=False) assert not PrevDagrunDep().is_met(ti=ti, dep_context=dep_context)
def test_first_task_run(self): """ The first task run for a TI should pass since it has no previous dagrun. """ task = self._get_task( depends_on_past=True, start_date=datetime(2016, 1, 1), wait_for_downstream=False ) prev_ti = None ti = Mock(task=task, previous_ti=prev_ti, execution_date=datetime(2016, 1, 1)) dep_context = DepContext(ignore_depends_on_past=False) assert PrevDagrunDep().is_met(ti=ti, dep_context=dep_context)
def _get_ready_tis(self, scheduleable_tasks, finished_tasks, session): ready_tis = [] changed_tis = False for st in scheduleable_tasks: st_old_state = st.state if st.are_dependencies_met( dep_context=DepContext( flag_upstream_failed=True, finished_tasks=finished_tasks), session=session): ready_tis.append(st) elif st_old_state != st.current_state(session=session): changed_tis = True return ready_tis, changed_tis
def test_no_parent(): """ A simple DAG with a single task. NotPreviouslySkippedDep is met. """ start_date = pendulum.datetime(2020, 1, 1) dag = DAG("test_test_no_parent_dag", schedule_interval=None, start_date=start_date) op1 = DummyOperator(task_id="op1", dag=dag) ti1 = TaskInstance(op1, start_date) with create_session() as session: dep = NotPreviouslySkippedDep() assert len(list(dep.get_dep_statuses(ti1, session, DepContext()))) == 0 assert dep.is_met(ti1, session) assert ti1.state != State.SKIPPED
def test_context_ignore_depends_on_past(self): """ If the context overrides depends_on_past then the dep should be met, even though there is no previous_ti which would normally fail the dep """ task = self._get_task(depends_on_past=True, start_date=datetime(2016, 1, 1), wait_for_downstream=False) prev_ti = Mock(task=task, state=State.SUCCESS, are_dependents_done=Mock(return_value=True), execution_date=datetime(2016, 1, 2)) ti = Mock(task=task, previous_ti=prev_ti, execution_date=datetime(2016, 1, 3)) dep_context = DepContext(ignore_depends_on_past=True) self.assertTrue(PrevDagrunDep().is_met(ti=ti, dep_context=dep_context))
def test_not_depends_on_past(self): """ If depends on past isn't set in the task then the previous dagrun should be ignored, even though there is no previous_ti which would normally fail the dep """ task = self._get_task( depends_on_past=False, start_date=datetime(2016, 1, 1), wait_for_downstream=False ) prev_ti = Mock( task=task, state=State.SUCCESS, are_dependents_done=Mock(return_value=True), execution_date=datetime(2016, 1, 2), ) ti = Mock(task=task, previous_ti=prev_ti, execution_date=datetime(2016, 1, 3)) dep_context = DepContext(ignore_depends_on_past=False) assert PrevDagrunDep().is_met(ti=ti, dep_context=dep_context)
def test_no_skipmixin_parent(): """ A simple DAG with no branching. Both op1 and op2 are DummyOperator. NotPreviouslySkippedDep is met. """ start_date = pendulum.datetime(2020, 1, 1) dag = DAG("test_no_skipmixin_parent_dag", schedule_interval=None, start_date=start_date) op1 = DummyOperator(task_id="op1", dag=dag) op2 = DummyOperator(task_id="op2", dag=dag) op1 >> op2 ti2 = TaskInstance(op2, start_date) with create_session() as session: dep = NotPreviouslySkippedDep() assert len(list(dep.get_dep_statuses(ti2, session, DepContext()))) == 0 assert dep.is_met(ti2, session) assert ti2.state != State.SKIPPED
def test_parent_follow_branch(): """ A simple DAG with a BranchPythonOperator that follows op2. NotPreviouslySkippedDep is met. """ start_date = pendulum.datetime(2020, 1, 1) dag = DAG( "test_parent_follow_branch_dag", schedule_interval=None, start_date=start_date ) op1 = BranchPythonOperator(task_id="op1", python_callable=lambda: "op2", dag=dag) op2 = DummyOperator(task_id="op2", dag=dag) op1 >> op2 TaskInstance(op1, start_date).run() ti2 = TaskInstance(op2, start_date) with create_session() as session: dep = NotPreviouslySkippedDep() assert len(list(dep.get_dep_statuses(ti2, session, DepContext()))) == 0 assert dep.is_met(ti2, session) assert ti2.state != State.SKIPPED
def _process_running_ti(session, ti): """ Process the TaskInstance object which are running. :param session: :param ti: :return: """ from airflow.ti_deps.deps.runnable_exec_date_dep import RunnableExecDateDep from airflow.ti_deps.deps.valid_state_dep import ValidStateDep from airflow.ti_deps.deps.events_dep import EventTIDep EVENT_SCHEDULED_DEPS = { RunnableExecDateDep(), ValidStateDep(RUNNING_STATES), EventTIDep(), } dep_context = DepContext(deps=EVENT_SCHEDULED_DEPS) if ti.are_dependencies_met(dep_context=dep_context, session=session): if action_is_stop_or_restart(ti, session): log.info("stop or restart task %s ", ti)
def test_parent_not_executed(): """ A simple DAG with a BranchPythonOperator that does not follow op2. Parent task is not yet executed (no xcom data). NotPreviouslySkippedDep is met (no decision). """ start_date = pendulum.datetime(2020, 1, 1) dag = DAG( "test_parent_not_executed_dag", schedule_interval=None, start_date=start_date ) op1 = BranchPythonOperator(task_id="op1", python_callable=lambda: "op3", dag=dag) op2 = DummyOperator(task_id="op2", dag=dag) op3 = DummyOperator(task_id="op3", dag=dag) op1 >> [op2, op3] ti2 = TaskInstance(op2, start_date) with create_session() as session: dep = NotPreviouslySkippedDep() assert len(list(dep.get_dep_statuses(ti2, session, DepContext()))) == 0 assert dep.is_met(ti2, session) assert ti2.state == State.NONE
def task_failed_deps(args): """ Returns the unmet dependencies for a task instance from the perspective of the scheduler (i.e. why a task instance doesn't get scheduled and then queued by the scheduler, and then run by an executor). >>> airflow task_failed_deps tutorial sleep 2015-01-01 Task instance dependencies not met: Dagrun Running: Task instance's dagrun did not exist: Unknown reason Trigger Rule: Task's trigger rule 'all_success' requires all upstream tasks to have succeeded, but found 1 non-success(es). """ dag = get_dag(args) task = dag.get_task(task_id=args.task_id) ti = TaskInstance(task, args.execution_date) dep_context = DepContext(deps=SCHEDULER_DEPS) failed_deps = list(ti.get_failed_dep_statuses(dep_context=dep_context)) if failed_deps: print("Task instance dependencies not met:") for dep in failed_deps: print("{}: {}".format(dep.dep_name, dep.reason)) else: print("Task instance dependencies are all met.")
def task_failed_deps(args): """ Returns the unmet dependencies for a task instance from the perspective of the scheduler (i.e. why a task instance doesn't get scheduled and then queued by the scheduler, and then run by an executor). >>> airflow tasks failed-deps tutorial sleep 2015-01-01 Task instance dependencies not met: Dagrun Running: Task instance's dagrun did not exist: Unknown reason Trigger Rule: Task's trigger rule 'all_success' requires all upstream tasks to have succeeded, but found 1 non-success(es). """ dag = get_dag(args.subdir, args.dag_id) task = dag.get_task(task_id=args.task_id) ti, _ = _get_ti(task, args.execution_date_or_run_id, args.map_index) dep_context = DepContext(deps=SCHEDULER_QUEUED_DEPS) failed_deps = list(ti.get_failed_dep_statuses(dep_context=dep_context)) # TODO, Do we want to print or log this if failed_deps: print("Task instance dependencies not met:") for dep in failed_deps: print(f"{dep.dep_name}: {dep.reason}") else: print("Task instance dependencies are all met.")
def test_not_task_concurrency(self): task = self._get_task(start_date=datetime(2016, 1, 1)) dep_context = DepContext() ti = Mock(task=task, execution_date=datetime(2016, 1, 1)) self.assertTrue(TaskConcurrencyDep().is_met(ti=ti, dep_context=dep_context))