예제 #1
0
    def _collect_errors(self, ti_status: _DagRunTaskStatus, session=None):
        def tabulate_ti_keys_set(ti_keys: Iterable[TaskInstanceKey]) -> str:
            # Sorting by execution date first
            sorted_ti_keys: Any = sorted(
                ti_keys,
                key=lambda ti_key: (
                    ti_key.run_id,
                    ti_key.dag_id,
                    ti_key.task_id,
                    ti_key.map_index,
                    ti_key.try_number,
                ),
            )

            if all(key.map_index == -1 for key in ti_keys):
                headers = ["DAG ID", "Task ID", "Run ID", "Try number"]
                sorted_ti_keys = map(lambda k: k[0:4], sorted_ti_keys)
            else:
                headers = [
                    "DAG ID", "Task ID", "Run ID", "Map Index", "Try number"
                ]

            return tabulate(sorted_ti_keys, headers=headers)

        err = ''
        if ti_status.failed:
            err += "Some task instances failed:\n"
            err += tabulate_ti_keys_set(ti_status.failed)
        if ti_status.deadlocked:
            err += 'BackfillJob is deadlocked.'
            deadlocked_depends_on_past = any(
                t.are_dependencies_met(
                    dep_context=DepContext(ignore_depends_on_past=False),
                    session=session,
                    verbose=self.verbose,
                ) != t.are_dependencies_met(dep_context=DepContext(
                    ignore_depends_on_past=True),
                                            session=session,
                                            verbose=self.verbose)
                for t in ti_status.deadlocked)
            if deadlocked_depends_on_past:
                err += (
                    'Some of the deadlocked tasks were unable to run because '
                    'of "depends_on_past" relationships. Try running the '
                    'backfill with the option '
                    '"ignore_first_depends_on_past=True" or passing "-I" at '
                    'the command line.')
            err += '\nThese tasks have succeeded:\n'
            err += tabulate_ti_keys_set(ti_status.succeeded)
            err += '\n\nThese tasks are running:\n'
            err += tabulate_ti_keys_set(ti_status.running)
            err += '\n\nThese tasks have failed:\n'
            err += tabulate_ti_keys_set(ti_status.failed)
            err += '\n\nThese tasks are skipped:\n'
            err += tabulate_ti_keys_set(ti_status.skipped)
            err += '\n\nThese tasks are deadlocked:\n'
            err += tabulate_ti_keys_set(
                [ti.key for ti in ti_status.deadlocked])

        return err
예제 #2
0
    def _collect_errors(self, ti_status, session=None):
        err = ''
        if ti_status.failed:
            err += ("---------------------------------------------------\n"
                    "Some task instances failed:\n{}\n".format(
                        ti_status.failed))
        if ti_status.deadlocked:
            err += ('---------------------------------------------------\n'
                    'BackfillJob is deadlocked.')
            deadlocked_depends_on_past = any(
                t.are_dependencies_met(dep_context=DepContext(
                    ignore_depends_on_past=False),
                                       session=session,
                                       verbose=self.verbose) !=
                t.are_dependencies_met(dep_context=DepContext(
                    ignore_depends_on_past=True),
                                       session=session,
                                       verbose=self.verbose)
                for t in ti_status.deadlocked)
            if deadlocked_depends_on_past:
                err += (
                    'Some of the deadlocked tasks were unable to run because '
                    'of "depends_on_past" relationships. Try running the '
                    'backfill with the option '
                    '"ignore_first_depends_on_past=True" or passing "-I" at '
                    'the command line.')
            err += ' These tasks have succeeded:\n{}\n'.format(
                ti_status.succeeded)
            err += ' These tasks are running:\n{}\n'.format(ti_status.running)
            err += ' These tasks have failed:\n{}\n'.format(ti_status.failed)
            err += ' These tasks are skipped:\n{}\n'.format(ti_status.skipped)
            err += ' These tasks are deadlocked:\n{}\n'.format(
                ti_status.deadlocked)

        return err
예제 #3
0
    def _collect_errors(self, ti_status, session=None):
        def tabulate_ti_keys_set(set_ti_keys: Set[TaskInstanceKey]) -> str:
            # Sorting by execution date first
            sorted_ti_keys = sorted(
                set_ti_keys,
                key=lambda ti_key: (ti_key.run_id, ti_key.dag_id, ti_key.
                                    task_id, ti_key.try_number),
            )
            return tabulate(
                sorted_ti_keys,
                headers=["DAG ID", "Task ID", "Run ID", "Try number"])

        def tabulate_tis_set(set_tis: Set[TaskInstance]) -> str:
            # Sorting by execution date first
            sorted_tis = sorted(
                set_tis,
                key=lambda ti:
                (ti.run_id, ti.dag_id, ti.task_id, ti.try_number))
            tis_values = ((ti.dag_id, ti.task_id, ti.run_id, ti.try_number)
                          for ti in sorted_tis)
            return tabulate(
                tis_values,
                headers=["DAG ID", "Task ID", "Run ID", "Try number"])

        err = ''
        if ti_status.failed:
            err += "Some task instances failed:\n"
            err += tabulate_ti_keys_set(ti_status.failed)
        if ti_status.deadlocked:
            err += 'BackfillJob is deadlocked.'
            deadlocked_depends_on_past = any(
                t.are_dependencies_met(
                    dep_context=DepContext(ignore_depends_on_past=False),
                    session=session,
                    verbose=self.verbose,
                ) != t.are_dependencies_met(dep_context=DepContext(
                    ignore_depends_on_past=True),
                                            session=session,
                                            verbose=self.verbose)
                for t in ti_status.deadlocked)
            if deadlocked_depends_on_past:
                err += (
                    'Some of the deadlocked tasks were unable to run because '
                    'of "depends_on_past" relationships. Try running the '
                    'backfill with the option '
                    '"ignore_first_depends_on_past=True" or passing "-I" at '
                    'the command line.')
            err += '\nThese tasks have succeeded:\n'
            err += tabulate_ti_keys_set(ti_status.succeeded)
            err += '\n\nThese tasks are running:\n'
            err += tabulate_ti_keys_set(ti_status.running)
            err += '\n\nThese tasks have failed:\n'
            err += tabulate_ti_keys_set(ti_status.failed)
            err += '\n\nThese tasks are skipped:\n'
            err += tabulate_ti_keys_set(ti_status.skipped)
            err += '\n\nThese tasks are deadlocked:\n'
            err += tabulate_tis_set(ti_status.deadlocked)

        return err
예제 #4
0
    def _collect_errors(self, ti_status, session=None):
        err = ""
        if ti_status.failed:
            dr = get_databand_run()
            upstream_failed = []
            failed = []
            for fail_info in ti_status.failed:
                airflow_task_id = fail_info[1]
                task_run = dr.get_task_run(airflow_task_id)
                task_name = task_run.task.task_name
                if task_run.task_run_state == State.UPSTREAM_FAILED:
                    # we don't want to show upstream failed in the list
                    upstream_failed.append(task_name)
                else:
                    failed.append(task_name)
            if upstream_failed:
                err += (
                    "Task that didn't run because "
                    "of failed dependency:\n\t{}\n".format("\n\t".join(upstream_failed))
                )
            if failed:
                err += "Failed tasks are:\n\t{}".format("\n\t".join(failed))
        if ti_status.deadlocked:
            err += (
                "---------------------------------------------------\n"
                "DagRunJob is deadlocked."
            )
            deadlocked_depends_on_past = any(
                t.are_dependencies_met(
                    dep_context=DepContext(ignore_depends_on_past=False),
                    session=session,
                    verbose=self.verbose,
                )
                != t.are_dependencies_met(
                    dep_context=DepContext(ignore_depends_on_past=True),
                    session=session,
                    verbose=self.verbose,
                )
                for t in ti_status.deadlocked
            )
            if deadlocked_depends_on_past:
                err += (
                    "Some of the deadlocked tasks were unable to run because "
                    'of "depends_on_past" relationships. Try running the '
                    "backfill with the option "
                    '"ignore_first_depends_on_past=True" or passing "-I" at '
                    "the command line."
                )
            err += " These tasks have succeeded:\n{}\n".format(ti_status.succeeded)
            err += " These tasks are running:\n{}\n".format(ti_status.running)
            err += " These tasks have failed:\n{}\n".format(ti_status.failed)
            err += " These tasks are skipped:\n{}\n".format(ti_status.skipped)
            err += " These tasks are deadlocked:\n{}\n".format(ti_status.deadlocked)

        return err
예제 #5
0
    def _get_dep_statuses(self, ti, session, dep_context: DepContext):
        # Checking that all upstream dependencies have succeeded
        if not ti.task.upstream_list:
            yield self._passing_status(
                reason="The task instance did not have any upstream tasks.")
            return

        if ti.task.trigger_rule == TR.ALWAYS:
            yield self._passing_status(
                reason="The task had a always trigger rule set.")
            return
        # see if the task name is in the task upstream for our task
        successes, skipped, failed, upstream_failed, done = self._get_states_count_upstream_ti(
            ti=ti,
            finished_tis=dep_context.ensure_finished_tis(
                ti.get_dagrun(session), session))

        yield from self._evaluate_trigger_rule(
            ti=ti,
            successes=successes,
            skipped=skipped,
            failed=failed,
            upstream_failed=upstream_failed,
            done=done,
            flag_upstream_failed=dep_context.flag_upstream_failed,
            session=session,
        )
예제 #6
0
    def get_dep_statuses(self, ti, session, dep_context=None):
        """
        Wrapper around the private _get_dep_statuses method that contains some global
        checks for all dependencies.

        :param ti: the task instance to get the dependency status for
        :type ti: TaskInstance
        :param session: database session
        :type session: Session
        :param dep_context: the context for which this dependency should be evaluated for
        :type dep_context: DepContext
        """
        # this avoids a circular dependency
        from airflow.ti_deps.dep_context import DepContext

        if dep_context is None:
            dep_context = DepContext()

        if self.IGNOREABLE and dep_context.ignore_all_deps:
            yield self._passing_status(
                reason="Context specified all dependencies should be ignored.")
            raise StopIteration

        if self.IS_TASK_DEP and dep_context.ignore_task_deps:
            yield self._passing_status(
                reason="Context specified all task dependencies should be ignored.")
            raise StopIteration

        for dep_status in self._get_dep_statuses(ti, session, dep_context):
            yield dep_status
    def _process_finished_ti(session, ti):
        """
        Process the TaskInstance object which already finished.
        :param session:
        :param ti:
        :return:
        """
        from airflow.ti_deps.deps.runnable_exec_date_dep import RunnableExecDateDep
        from airflow.ti_deps.deps.valid_state_dep import ValidStateDep
        from airflow.ti_deps.deps.events_dep import EventTIDep

        EVENT_SCHEDULED_DEPS = {
            RunnableExecDateDep(),
            ValidStateDep(FINISHED_STATES),
            EventTIDep(),
        }
        dep_context = DepContext(deps=EVENT_SCHEDULED_DEPS)
        if ti.are_dependencies_met(dep_context=dep_context, session=session):
            ts = TaskState.query_task_state(ti, session=session)
            if ts.action is None or TaskAction(ts.action) == TaskAction.NONE:
                return
            if TaskAction(ts.action) == TaskAction.RESTART:
                log.debug('Queuing Finished task: %s', ti)
                ti.state = State.SCHEDULED
                log.info("Creating / updating %s in ORM", ti)
                session.merge(ti)
            ts.action = None
            session.merge(ts)
            session.commit()
    def get_dep_statuses(self, ti, session, dep_context=None):
        """
        Wrapper around the private _get_dep_statuses method that contains some global
        checks for all dependencies.

        :param ti: the task instance to get the dependency status for
        :type ti: airflow.models.TaskInstance
        :param session: database session
        :type session: sqlalchemy.orm.session.Session
        :param dep_context: the context for which this dependency should be evaluated for
        :type dep_context: DepContext
        """
        if dep_context is None:
            dep_context = DepContext()

        if self.IGNOREABLE and dep_context.ignore_all_deps:
            yield self._passing_status(
                reason="Context specified all dependencies should be ignored.")
            return

        if self.IS_TASK_DEP and dep_context.ignore_task_deps:
            yield self._passing_status(
                reason=
                "Context specified all task dependencies should be ignored.")
            return

        yield from self._get_dep_statuses(ti, session, dep_context)
예제 #9
0
 def test_not_reached_concurrency(self):
     task = self._get_task(start_date=datetime(2016, 1, 1),
                           task_concurrency=1)
     dep_context = DepContext()
     ti = Mock(task=task, execution_date=datetime(2016, 1, 1))
     ti.get_num_running_task_instances = lambda x: 0
     assert TaskConcurrencyDep().is_met(ti=ti, dep_context=dep_context)
예제 #10
0
    def _get_ready_tis(
        self,
        scheduleable_tasks: List[TI],
        finished_tasks: List[TI],
        session: Session,
    ) -> Tuple[List[TI], bool]:
        old_states = {}
        ready_tis: List[TI] = []
        changed_tis = False

        if not scheduleable_tasks:
            return ready_tis, changed_tis

        # Check dependencies
        for st in scheduleable_tasks:
            old_state = st.state
            if st.are_dependencies_met(dep_context=DepContext(
                    flag_upstream_failed=True, finished_tasks=finished_tasks),
                                       session=session):
                ready_tis.append(st)
            else:
                old_states[st.key] = old_state

        # Check if any ti changed state
        tis_filter = TI.filter_for_tis(old_states.keys())
        if tis_filter is not None:
            fresh_tis = session.query(TI).filter(tis_filter).all()
            changed_tis = any(ti.state != old_states[ti.key]
                              for ti in fresh_tis)

        return ready_tis, changed_tis
예제 #11
0
def test_parent_skip_branch():
    """
    A simple DAG with a BranchPythonOperator that does not follow op2. NotPreviouslySkippedDep is not met.
    """
    with create_session() as session:
        session.query(DagRun).delete()
        session.query(TaskInstance).delete()
        start_date = pendulum.datetime(2020, 1, 1)
        dag = DAG("test_parent_skip_branch_dag",
                  schedule_interval=None,
                  start_date=start_date)
        dag.create_dagrun(run_type=DagRunType.MANUAL,
                          state=State.RUNNING,
                          execution_date=start_date)
        op1 = BranchPythonOperator(task_id="op1",
                                   python_callable=lambda: "op3",
                                   dag=dag)
        op2 = DummyOperator(task_id="op2", dag=dag)
        op3 = DummyOperator(task_id="op3", dag=dag)
        op1 >> [op2, op3]
        TaskInstance(op1, start_date).run()
        ti2 = TaskInstance(op2, start_date)
        dep = NotPreviouslySkippedDep()

        assert len(list(dep.get_dep_statuses(ti2, session, DepContext()))) == 1
        session.commit()
        assert not dep.is_met(ti2, session)
        assert ti2.state == State.SKIPPED
예제 #12
0
    def _get_ready_tis(
        self,
        schedulable_tis: List[TI],
        finished_tis: List[TI],
        session: Session,
    ) -> Tuple[List[TI], bool]:
        old_states = {}
        ready_tis: List[TI] = []
        changed_tis = False

        if not schedulable_tis:
            return ready_tis, changed_tis

        # If we expand TIs, we need a new list so that we iterate over them too. (We can't alter
        # `schedulable_tis` in place and have the `for` loop pick them up
        expanded_tis: List[TI] = []
        dep_context = DepContext(
            flag_upstream_failed=True,
            ignore_unmapped_tasks=
            True,  # Ignore this Dep, as we will expand it if we can.
            finished_tis=finished_tis,
        )

        # Check dependencies
        for schedulable in itertools.chain(schedulable_tis, expanded_tis):

            old_state = schedulable.state
            if schedulable.are_dependencies_met(session=session,
                                                dep_context=dep_context):
                ready_tis.append(schedulable)
            else:
                old_states[schedulable.key] = old_state
                continue

            # Expansion of last resort! This is ideally handled in the mini-scheduler in LocalTaskJob, but if
            # for any reason it wasn't, we need to expand it now
            if schedulable.map_index < 0 and schedulable.task.is_mapped:
                # HACK. This needs a better way, one that copes with multiple upstreams!
                for ti in finished_tis:
                    if schedulable.task_id in ti.task.downstream_task_ids:

                        assert isinstance(schedulable.task, MappedOperator)
                        new_tis = schedulable.task.expand_mapped_task(
                            self.run_id, session=session)
                        if schedulable.state == TaskInstanceState.SKIPPED:
                            # Task is now skipped (likely cos upstream returned 0 tasks
                            continue
                        assert new_tis[0] is schedulable
                        expanded_tis.extend(new_tis[1:])
                        break

        # Check if any ti changed state
        tis_filter = TI.filter_for_tis(old_states.keys())
        if tis_filter is not None:
            fresh_tis = session.query(TI).filter(tis_filter).all()
            changed_tis = any(ti.state != old_states[ti.key]
                              for ti in fresh_tis)

        return ready_tis, changed_tis
예제 #13
0
    def test_get_states_count_upstream_ti(self):
        """
        this test tests the helper function '_get_states_count_upstream_ti' as a unit and inside update_state
        """
        from airflow.ti_deps.dep_context import DepContext

        get_states_count_upstream_ti = TriggerRuleDep._get_states_count_upstream_ti
        session = settings.Session()
        now = timezone.utcnow()
        dag = DAG(
            'test_dagrun_with_pre_tis',
            start_date=DEFAULT_DATE,
            default_args={'owner': 'owner1'})

        with dag:
            op1 = DummyOperator(task_id='A')
            op2 = DummyOperator(task_id='B')
            op3 = DummyOperator(task_id='C')
            op4 = DummyOperator(task_id='D')
            op5 = DummyOperator(task_id='E', trigger_rule=TriggerRule.ONE_FAILED)

            op1.set_downstream([op2, op3])  # op1 >> op2, op3
            op4.set_upstream([op3, op2])  # op3, op2 >> op4
            op5.set_upstream([op2, op3, op4])  # (op2, op3, op4) >> op5

        clear_db_runs()
        dag.clear()
        dr = dag.create_dagrun(run_id='test_dagrun_with_pre_tis',
                               state=State.RUNNING,
                               execution_date=now,
                               start_date=now)

        ti_op1 = TaskInstance(task=dag.get_task(op1.task_id), execution_date=dr.execution_date)
        ti_op2 = TaskInstance(task=dag.get_task(op2.task_id), execution_date=dr.execution_date)
        ti_op3 = TaskInstance(task=dag.get_task(op3.task_id), execution_date=dr.execution_date)
        ti_op4 = TaskInstance(task=dag.get_task(op4.task_id), execution_date=dr.execution_date)
        ti_op5 = TaskInstance(task=dag.get_task(op5.task_id), execution_date=dr.execution_date)

        ti_op1.set_state(state=State.SUCCESS, session=session)
        ti_op2.set_state(state=State.FAILED, session=session)
        ti_op3.set_state(state=State.SUCCESS, session=session)
        ti_op4.set_state(state=State.SUCCESS, session=session)
        ti_op5.set_state(state=State.SUCCESS, session=session)

        session.commit()

        # check handling with cases that tasks are triggered from backfill with no finished tasks
        finished_tasks = DepContext().ensure_finished_tasks(ti_op2.task.dag, ti_op2.execution_date, session)
        self.assertEqual(get_states_count_upstream_ti(finished_tasks=finished_tasks, ti=ti_op2),
                         (1, 0, 0, 0, 1))
        finished_tasks = dr.get_task_instances(state=State.finished() + [State.UPSTREAM_FAILED],
                                               session=session)
        self.assertEqual(get_states_count_upstream_ti(finished_tasks=finished_tasks, ti=ti_op4),
                         (1, 0, 1, 0, 2))
        self.assertEqual(get_states_count_upstream_ti(finished_tasks=finished_tasks, ti=ti_op5),
                         (2, 0, 1, 0, 3))

        dr.update_state()
        self.assertEqual(State.SUCCESS, dr.state)
예제 #14
0
    def test_all_met(self):
        """
        Test to make sure all the conditions for the dep are met
        """
        task = self._get_task(depends_on_past=True, start_date=datetime(2016, 1, 1), wait_for_downstream=True)
        prev_ti = Mock(state=State.SUCCESS, are_dependents_done=Mock(return_value=True))
        ti = Mock(task=task, execution_date=datetime(2016, 1, 2), **{'get_previous_ti.return_value': prev_ti})
        dep_context = DepContext(ignore_depends_on_past=False)

        assert PrevDagrunDep().is_met(ti=ti, dep_context=dep_context)
예제 #15
0
    def _get_ready_tis(
        self,
        schedulable_tis: List[TI],
        finished_tis: List[TI],
        session: Session,
    ) -> Tuple[List[TI], bool, bool]:
        old_states = {}
        ready_tis: List[TI] = []
        changed_tis = False

        if not schedulable_tis:
            return ready_tis, changed_tis, False

        # If we expand TIs, we need a new list so that we iterate over them too. (We can't alter
        # `schedulable_tis` in place and have the `for` loop pick them up
        additional_tis: List[TI] = []
        dep_context = DepContext(
            flag_upstream_failed=True,
            ignore_unmapped_tasks=
            True,  # Ignore this Dep, as we will expand it if we can.
            finished_tis=finished_tis,
        )

        # Check dependencies.
        expansion_happened = False
        for schedulable in itertools.chain(schedulable_tis, additional_tis):
            old_state = schedulable.state
            if not schedulable.are_dependencies_met(session=session,
                                                    dep_context=dep_context):
                old_states[schedulable.key] = old_state
                continue
            # If schedulable is from a mapped task, but not yet expanded, do it
            # now. This is called in two places: First and ideally in the mini
            # scheduler at the end of LocalTaskJob, and then as an "expansion of
            # last resort" in the scheduler to ensure that the mapped task is
            # correctly expanded before executed.
            if schedulable.map_index < 0 and isinstance(
                    schedulable.task, MappedOperator):
                expanded_tis, _ = schedulable.task.expand_mapped_task(
                    self.run_id, session=session)
                if expanded_tis:
                    assert expanded_tis[0] is schedulable
                    additional_tis.extend(expanded_tis[1:])
                expansion_happened = True
            if schedulable.state in SCHEDULEABLE_STATES:
                ready_tis.append(schedulable)

        # Check if any ti changed state
        tis_filter = TI.filter_for_tis(old_states)
        if tis_filter is not None:
            fresh_tis = session.query(TI).filter(tis_filter).all()
            changed_tis = any(ti.state != old_states[ti.key]
                              for ti in fresh_tis)

        return ready_tis, changed_tis, expansion_happened
예제 #16
0
 def _are_premature_tis(self, unfinished_tasks, finished_tasks, session):
     # there might be runnable tasks that are up for retry and from some reason(retry delay, etc) are
     # not ready yet so we set the flags to count them in
     for ut in unfinished_tasks:
         if ut.are_dependencies_met(dep_context=DepContext(
                 flag_upstream_failed=True,
                 ignore_in_retry_period=True,
                 ignore_in_reschedule_period=True,
                 finished_tasks=finished_tasks),
                                    session=session):
             return True
예제 #17
0
    def test_failed_wait_for_downstream(self):
        """
        If the previous TI specified to wait for the downstream tasks of the
        previous dagrun then it should fail this dep if the downstream TIs of
        the previous TI are not done.
        """
        task = self._get_task(depends_on_past=True, start_date=datetime(2016, 1, 1), wait_for_downstream=True)
        prev_ti = Mock(state=State.SUCCESS, are_dependents_done=Mock(return_value=False))
        ti = Mock(task=task, previous_ti=prev_ti, execution_date=datetime(2016, 1, 2))
        dep_context = DepContext(ignore_depends_on_past=False)

        assert not PrevDagrunDep().is_met(ti=ti, dep_context=dep_context)
예제 #18
0
    def test_prev_ti_bad_state(self):
        """
        If the previous TI did not complete execution this dep should fail.
        """
        task = self._get_task(
            depends_on_past=True, start_date=datetime(2016, 1, 1), wait_for_downstream=False
        )
        prev_ti = Mock(state=State.NONE, are_dependents_done=Mock(return_value=True))
        ti = Mock(task=task, previous_ti=prev_ti, execution_date=datetime(2016, 1, 2))
        dep_context = DepContext(ignore_depends_on_past=False)

        assert not PrevDagrunDep().is_met(ti=ti, dep_context=dep_context)
예제 #19
0
    def test_first_task_run(self):
        """
        The first task run for a TI should pass since it has no previous dagrun.
        """
        task = self._get_task(
            depends_on_past=True, start_date=datetime(2016, 1, 1), wait_for_downstream=False
        )
        prev_ti = None
        ti = Mock(task=task, previous_ti=prev_ti, execution_date=datetime(2016, 1, 1))
        dep_context = DepContext(ignore_depends_on_past=False)

        assert PrevDagrunDep().is_met(ti=ti, dep_context=dep_context)
예제 #20
0
 def _get_ready_tis(self, scheduleable_tasks, finished_tasks, session):
     ready_tis = []
     changed_tis = False
     for st in scheduleable_tasks:
         st_old_state = st.state
         if st.are_dependencies_met(
             dep_context=DepContext(
                 flag_upstream_failed=True,
                 finished_tasks=finished_tasks),
                 session=session):
             ready_tis.append(st)
         elif st_old_state != st.current_state(session=session):
             changed_tis = True
     return ready_tis, changed_tis
예제 #21
0
def test_no_parent():
    """
    A simple DAG with a single task. NotPreviouslySkippedDep is met.
    """
    start_date = pendulum.datetime(2020, 1, 1)
    dag = DAG("test_test_no_parent_dag", schedule_interval=None, start_date=start_date)
    op1 = DummyOperator(task_id="op1", dag=dag)

    ti1 = TaskInstance(op1, start_date)

    with create_session() as session:
        dep = NotPreviouslySkippedDep()
        assert len(list(dep.get_dep_statuses(ti1, session, DepContext()))) == 0
        assert dep.is_met(ti1, session)
        assert ti1.state != State.SKIPPED
예제 #22
0
    def test_context_ignore_depends_on_past(self):
        """
        If the context overrides depends_on_past then the dep should be met,
        even though there is no previous_ti which would normally fail the dep
        """
        task = self._get_task(depends_on_past=True,
                              start_date=datetime(2016, 1, 1),
                              wait_for_downstream=False)
        prev_ti = Mock(task=task, state=State.SUCCESS,
                       are_dependents_done=Mock(return_value=True),
                       execution_date=datetime(2016, 1, 2))
        ti = Mock(task=task, previous_ti=prev_ti,
                  execution_date=datetime(2016, 1, 3))
        dep_context = DepContext(ignore_depends_on_past=True)

        self.assertTrue(PrevDagrunDep().is_met(ti=ti, dep_context=dep_context))
예제 #23
0
    def test_not_depends_on_past(self):
        """
        If depends on past isn't set in the task then the previous dagrun should be
        ignored, even though there is no previous_ti which would normally fail the dep
        """
        task = self._get_task(
            depends_on_past=False, start_date=datetime(2016, 1, 1), wait_for_downstream=False
        )
        prev_ti = Mock(
            task=task,
            state=State.SUCCESS,
            are_dependents_done=Mock(return_value=True),
            execution_date=datetime(2016, 1, 2),
        )
        ti = Mock(task=task, previous_ti=prev_ti, execution_date=datetime(2016, 1, 3))
        dep_context = DepContext(ignore_depends_on_past=False)

        assert PrevDagrunDep().is_met(ti=ti, dep_context=dep_context)
예제 #24
0
def test_no_skipmixin_parent():
    """
    A simple DAG with no branching. Both op1 and op2 are DummyOperator. NotPreviouslySkippedDep is met.
    """
    start_date = pendulum.datetime(2020, 1, 1)
    dag = DAG("test_no_skipmixin_parent_dag",
              schedule_interval=None,
              start_date=start_date)
    op1 = DummyOperator(task_id="op1", dag=dag)
    op2 = DummyOperator(task_id="op2", dag=dag)
    op1 >> op2

    ti2 = TaskInstance(op2, start_date)

    with create_session() as session:
        dep = NotPreviouslySkippedDep()
        assert len(list(dep.get_dep_statuses(ti2, session, DepContext()))) == 0
        assert dep.is_met(ti2, session)
        assert ti2.state != State.SKIPPED
예제 #25
0
def test_parent_follow_branch():
    """
    A simple DAG with a BranchPythonOperator that follows op2. NotPreviouslySkippedDep is met.
    """
    start_date = pendulum.datetime(2020, 1, 1)
    dag = DAG(
        "test_parent_follow_branch_dag", schedule_interval=None, start_date=start_date
    )
    op1 = BranchPythonOperator(task_id="op1", python_callable=lambda: "op2", dag=dag)
    op2 = DummyOperator(task_id="op2", dag=dag)
    op1 >> op2

    TaskInstance(op1, start_date).run()
    ti2 = TaskInstance(op2, start_date)

    with create_session() as session:
        dep = NotPreviouslySkippedDep()
        assert len(list(dep.get_dep_statuses(ti2, session, DepContext()))) == 0
        assert dep.is_met(ti2, session)
        assert ti2.state != State.SKIPPED
    def _process_running_ti(session, ti):
        """
        Process the TaskInstance object which are running.
        :param session:
        :param ti:
        :return:
        """
        from airflow.ti_deps.deps.runnable_exec_date_dep import RunnableExecDateDep
        from airflow.ti_deps.deps.valid_state_dep import ValidStateDep
        from airflow.ti_deps.deps.events_dep import EventTIDep

        EVENT_SCHEDULED_DEPS = {
            RunnableExecDateDep(),
            ValidStateDep(RUNNING_STATES),
            EventTIDep(),
        }
        dep_context = DepContext(deps=EVENT_SCHEDULED_DEPS)
        if ti.are_dependencies_met(dep_context=dep_context, session=session):
            if action_is_stop_or_restart(ti, session):
                log.info("stop or restart task %s ", ti)
예제 #27
0
def test_parent_not_executed():
    """
    A simple DAG with a BranchPythonOperator that does not follow op2. Parent task is not yet
    executed (no xcom data). NotPreviouslySkippedDep is met (no decision).
    """
    start_date = pendulum.datetime(2020, 1, 1)
    dag = DAG(
        "test_parent_not_executed_dag", schedule_interval=None, start_date=start_date
    )
    op1 = BranchPythonOperator(task_id="op1", python_callable=lambda: "op3", dag=dag)
    op2 = DummyOperator(task_id="op2", dag=dag)
    op3 = DummyOperator(task_id="op3", dag=dag)
    op1 >> [op2, op3]

    ti2 = TaskInstance(op2, start_date)

    with create_session() as session:
        dep = NotPreviouslySkippedDep()
        assert len(list(dep.get_dep_statuses(ti2, session, DepContext()))) == 0
        assert dep.is_met(ti2, session)
        assert ti2.state == State.NONE
예제 #28
0
def task_failed_deps(args):
    """
    Returns the unmet dependencies for a task instance from the perspective of the
    scheduler (i.e. why a task instance doesn't get scheduled and then queued by the
    scheduler, and then run by an executor).

    >>> airflow task_failed_deps tutorial sleep 2015-01-01
    Task instance dependencies not met:
    Dagrun Running: Task instance's dagrun did not exist: Unknown reason
    Trigger Rule: Task's trigger rule 'all_success' requires all upstream tasks to have succeeded, but found 1 non-success(es).
    """
    dag = get_dag(args)
    task = dag.get_task(task_id=args.task_id)
    ti = TaskInstance(task, args.execution_date)

    dep_context = DepContext(deps=SCHEDULER_DEPS)
    failed_deps = list(ti.get_failed_dep_statuses(dep_context=dep_context))
    if failed_deps:
        print("Task instance dependencies not met:")
        for dep in failed_deps:
            print("{}: {}".format(dep.dep_name, dep.reason))
    else:
        print("Task instance dependencies are all met.")
예제 #29
0
def task_failed_deps(args):
    """
    Returns the unmet dependencies for a task instance from the perspective of the
    scheduler (i.e. why a task instance doesn't get scheduled and then queued by the
    scheduler, and then run by an executor).
    >>> airflow tasks failed-deps tutorial sleep 2015-01-01
    Task instance dependencies not met:
    Dagrun Running: Task instance's dagrun did not exist: Unknown reason
    Trigger Rule: Task's trigger rule 'all_success' requires all upstream tasks
    to have succeeded, but found 1 non-success(es).
    """
    dag = get_dag(args.subdir, args.dag_id)
    task = dag.get_task(task_id=args.task_id)
    ti, _ = _get_ti(task, args.execution_date_or_run_id, args.map_index)

    dep_context = DepContext(deps=SCHEDULER_QUEUED_DEPS)
    failed_deps = list(ti.get_failed_dep_statuses(dep_context=dep_context))
    # TODO, Do we want to print or log this
    if failed_deps:
        print("Task instance dependencies not met:")
        for dep in failed_deps:
            print(f"{dep.dep_name}: {dep.reason}")
    else:
        print("Task instance dependencies are all met.")
 def test_not_task_concurrency(self):
     task = self._get_task(start_date=datetime(2016, 1, 1))
     dep_context = DepContext()
     ti = Mock(task=task, execution_date=datetime(2016, 1, 1))
     self.assertTrue(TaskConcurrencyDep().is_met(ti=ti,
                                                 dep_context=dep_context))